In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
import time
import copy

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cpu


  return torch._C._cuda_getDeviceCount() > 0


In [2]:
trans = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train = torchvision.datasets.ImageFolder(root='../SubsetImages', transform=trans)
data = torchvision.datasets.ImageFolder(root='../../Largeset_notebooks/Images', transform=trans)
traindata, testvaldata = torch.utils.data.random_split(data, [12000, 8580])
valdata, testdata = torch.utils.data.random_split(testvaldata, [4290, 4290])
dset = {'train': train, 'val': valdata}

dataloaders = {x: torch.utils.data.DataLoader(dset[x], batch_size=16, shuffle=True) for x in ['train', 'val']}
dataset_sizes = {'train':1800, 'val':4290}
testloader = torch.utils.data.DataLoader(testdata, batch_size=16, shuffle=True)

In [3]:
net = models.resnet50(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
num_epochs = 30

since = time.time()
best_model_wts = copy.deepcopy(net.state_dict())
best_acc = 0.0

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch + 1, num_epochs))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:
        if phase == 'train':
            net.train()  # Set model to training mode
        else:
            net.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = net(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = (running_corrects.double() / dataset_sizes[phase])*100

        print('{} Loss: {:.4f} Acc: {:.2f}%'.format(
            phase, epoch_loss, epoch_acc))

        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(net.state_dict())

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}%'.format(best_acc))

# load best model weights
net.load_state_dict(best_model_wts)

Epoch 1/30
----------
train Loss: 4.5413 Acc: 7.56%
val Loss: 3.7100 Acc: 37.11%

Epoch 2/30
----------
train Loss: 3.3216 Acc: 39.00%
val Loss: 2.4236 Acc: 54.87%

Epoch 3/30
----------
train Loss: 2.2976 Acc: 58.00%
val Loss: 1.7400 Acc: 64.36%

Epoch 4/30
----------
train Loss: 1.6323 Acc: 70.56%
val Loss: 1.3490 Acc: 69.11%

Epoch 5/30
----------
train Loss: 1.1797 Acc: 81.94%
val Loss: 1.1356 Acc: 70.86%

Epoch 6/30
----------
train Loss: 0.8755 Acc: 86.94%
val Loss: 1.0039 Acc: 74.43%

Epoch 7/30
----------
train Loss: 0.6920 Acc: 90.89%
val Loss: 0.9312 Acc: 75.66%

Epoch 8/30
----------
train Loss: 0.5208 Acc: 94.89%
val Loss: 0.8747 Acc: 77.13%

Epoch 9/30
----------
train Loss: 0.4845 Acc: 96.11%
val Loss: 0.8720 Acc: 77.09%

Epoch 10/30
----------
train Loss: 0.4733 Acc: 95.94%
val Loss: 0.8721 Acc: 76.85%

Epoch 11/30
----------
train Loss: 0.4491 Acc: 96.78%
val Loss: 0.8520 Acc: 77.53%

Epoch 12/30
----------
train Loss: 0.4442 Acc: 96.56%
val Loss: 0.8656 Acc: 76.78%

Ep

<All keys matched successfully>

In [5]:
PATH = './subset_resnet50.pth'
torch.save(net.state_dict(), PATH)

In [10]:
net = models.resnet152(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
net.to(device)
PATH = './resnet152.pth'
net.load_state_dict(torch.load(PATH))

In [7]:
f = open("../../classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[i] = classs[0]

class_correct = list(0. for i in range(120))
class_total = list(0. for i in range(120))
correct = 0.0
total = 0.0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        c = (predicted == labels).squeeze()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
meanacc1 = (100 * correct / total)
newmap = {}
for i in range(120):
    newmap[classmap[i]] = 100 * class_correct[i] / class_total[i]
        
        
i = 1
with open('subset_cnn_accuracies.txt','w') as l:
    for k, v in newmap.items():
        l.write(k + '   {}\n'.format(v))
        i+=1
        
sortedmap = sorted(newmap.items(), key=lambda item: item[1], reverse=True)
for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc1)

Accuracy of               Blenheim_spaniel :        100 %
Accuracy of          flat_coated_retriever :        100 %
Accuracy of                       komondor :        100 %
Accuracy of                        Samoyed :        100 %
Accuracy of                       keeshond :        100 %
Accuracy of             Bedlington_terrier :         97 %
Accuracy of               Sealyham_terrier :         97 %
Accuracy of               Japanese_spaniel :         97 %
Accuracy of             Norwegian_elkhound :         97 %
Accuracy of    West_Highland_white_terrier :         97 %
Accuracy of                   bull_mastiff :         96 %
Accuracy of                   Afghan_hound :         95 %
Accuracy of                 Sussex_spaniel :         95 %
Accuracy of           Bernese_mountain_dog :         95 %
Accuracy of               Mexican_hairless :         94 %
Accuracy of               English_springer :         93 %
Accuracy of                    groenendael :         93 %
Accuracy of   

In [None]:
'''
BELOW are other CNNs, PERFORMS POORLY compared to resnet transfer
'''

In [None]:
'''
THIS IS PYTORCH TUTORIAL NET (NOT GOOD FOR FINE GRAIN)
'''

# import torch.nn as nn
# import torch.nn.functional as F


# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 60, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(60, 16, 5)
#         self.fc1 = nn.Linear(16 * 53*53, 1000)
#         self.fc2 = nn.Linear(1000, 120)
# #         self.fc3 = nn.Linear(84, 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(x.size(0), 16*53*53)
#         x = F.relu(self.fc1(x))
#         #x = F.relu(self.fc2(x))
#         x = self.fc2(x)
#         return x


# net = Net()
# net.to(device)

In [None]:
'''
THIS IS BETTER NETWORK FOUND ON MEDIUM POST FOR FINE GRAINED CLASSIFICATION:
https://medium.com/@uijaz59/dog-breed-classification-using-pytorch-207cf27c2031
'''

# class Net(nn.Module):
    
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 16, 3)
#         self.conv2 = nn.Conv2d(16, 32, 3)
#         self.conv3 = nn.Conv2d(32, 64, 3)
#         self.conv4 = nn.Conv2d(64, 128, 3)
#         self.conv5 = nn.Conv2d(128, 256, 3)
#         self.fc1 = nn.Linear(256 * 6 * 6, 120)
        
#         self.max_pool = nn.MaxPool2d(2, 2,ceil_mode=True)
#         self.dropout = nn.Dropout(0.2)
#         self.conv_bn1 = nn.BatchNorm2d(224,3)
#         self.conv_bn2 = nn.BatchNorm2d(16)
#         self.conv_bn3 = nn.BatchNorm2d(32)
#         self.conv_bn4 = nn.BatchNorm2d(64)
#         self.conv_bn5 = nn.BatchNorm2d(128)
#         self.conv_bn6 = nn.BatchNorm2d(256)
    
#     def forward(self, x):
        
#         x = F.relu(self.conv1(x))
#         x = self.max_pool(x)
#         x = self.conv_bn2(x)
        
#         x = F.relu(self.conv2(x))
#         x = self.max_pool(x)
#         x = self.conv_bn3(x)
        
#         x = F.relu(self.conv3(x))
#         x = self.max_pool(x)
#         x = self.conv_bn4(x)
        
#         x = F.relu(self.conv4(x))
#         x = self.max_pool(x)
#         x = self.conv_bn5(x)
        
#         x = F.relu(self.conv5(x))
#         x = self.max_pool(x)
#         x = self.conv_bn6(x)
        
#         x = x.view(-1, 256 * 6 * 6)
        
#         x = self.dropout(x)
#         x = self.fc1(x)
#         return x

# net = Net()
# net.to(device)

In [6]:
net = models.resnet50(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
PATH = './subset_resnet50.pth'
net.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
preds, labels = None, None
def get_all_preds_and_labels(model, loader):
    all_preds = torch.tensor([])
    all_labels = torch.tensor([])
    for batch in loader:
        images, labels = batch

        preds = model(images)
        _, predicted = torch.max(preds, 1)
        all_preds = torch.cat(
            (all_preds, predicted)
            ,dim=0
        )
        
        all_labels = torch.cat(
            (all_labels, labels)
            ,dim=0
        )
    return all_preds.numpy(), all_labels.numpy()

with torch.no_grad():
    preds, labels = get_all_preds_and_labels(net, testloader)

print(preds.shape)
print(labels.shape)

(4290,)
(4290,)


In [9]:
from sklearn.metrics import confusion_matrix
import numpy as np

cm = confusion_matrix(labels, preds)
np.savetxt('subset_cnn_confusion.txt', cm)

print(cm.shape)

(120, 120)


In [10]:
"""
   # Cell purpose : Calculating percision, recall, and F1-score from the confusion matrix"""
cm = np.loadtxt("subset_cnn_confusion.txt", dtype=float)

true_pos = np.diag(cm) # True Positives are on the diagonal position
false_pos = np.sum(cm, axis=0) - true_pos # False positives are column-wise sums. Without the diagonal
false_neg = np.sum(cm, axis=1) - true_pos # False negatives are row-wise sums. Without the diagonal

precision = np.sum(true_pos / (true_pos + false_pos)) / 120
recall = np.sum(true_pos / (true_pos + false_neg)) / 120
f1_array = true_pos / (true_pos + (1/2) * (false_pos + false_neg))
f1_score = (2 * precision * recall) / (precision + recall)

print("precision = {}".format(precision))
print("recall = {}".format(recall))
print("F1-avg = %.2f" % (f1_score))

np.savetxt("f1_subset_cnn.txt", f1_array)

precision = 0.7279041938230224
recall = 0.7247852996932358
F1-avg = 0.73
