In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
import time
import copy

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

In [None]:
trans = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data = torchvision.datasets.ImageFolder(root='Images', transform=trans)
traindata, testvaldata = torch.utils.data.random_split(data, [12000, 8580])
valdata, testdata = torch.utils.data.random_split(testvaldata, [4000, 4580])
dset = {'train': traindata, 'val': valdata}

dataloaders = {x: torch.utils.data.DataLoader(dset[x], batch_size=4, shuffle=True) for x in ['train', 'val']}
dataset_sizes = {'train':12000, 'val':4000}
testloader = torch.utils.data.DataLoader(testdata, batch_size = 4, shuffle=True)

print('Finished data augmentation')

In [None]:
net = models.resnet152(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
net.to(device)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
num_epochs = 30

since = time.time()
best_model_wts = copy.deepcopy(net.state_dict())
best_acc = 0.0

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch + 1, num_epochs))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:
        if phase == 'train':
            net.train()  # Set model to training mode
        else:
            net.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = net(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = (running_corrects.double() / dataset_sizes[phase])*100

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            phase, epoch_loss, epoch_acc))

        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(net.state_dict())

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
net.load_state_dict(best_model_wts)

Epoch 1/30
----------
train Loss: 1.3438 Acc: 60.8083
val Loss: 1.1506 Acc: 67.0750

Epoch 2/30
----------
train Loss: 1.0584 Acc: 68.5333
val Loss: 1.2308 Acc: 70.7000

Epoch 3/30
----------
train Loss: 0.8609 Acc: 73.3667
val Loss: 1.2182 Acc: 71.1500

Epoch 4/30
----------
train Loss: 0.7172 Acc: 77.5667
val Loss: 1.2343 Acc: 70.8500

Epoch 5/30
----------
train Loss: 0.6091 Acc: 80.8000
val Loss: 1.1445 Acc: 73.0500

Epoch 6/30
----------
train Loss: 0.4879 Acc: 84.7750
val Loss: 1.2420 Acc: 72.2250

Epoch 7/30
----------
train Loss: 0.2509 Acc: 92.5333
val Loss: 0.7975 Acc: 80.0250

Epoch 8/30
----------
train Loss: 0.1778 Acc: 95.0000
val Loss: 0.7545 Acc: 80.9250

Epoch 9/30
----------
train Loss: 0.1451 Acc: 96.1417
val Loss: 0.7493 Acc: 81.4250

Epoch 10/30
----------
train Loss: 0.1262 Acc: 96.7333
val Loss: 0.7394 Acc: 81.7250

Epoch 11/30
----------
train Loss: 0.1129 Acc: 97.3417
val Loss: 0.7357 Acc: 81.4250

Epoch 12/30
----------
train Loss: 0.0993 Acc: 97.9000
val Loss

<All keys matched successfully>

In [13]:
PATH = './resnet152.pth'
torch.save(net.state_dict(), PATH)

In [None]:
net = models.resnet152(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
net.to(device)
PATH = './resnet152.pth'
net.load_state_dict(torch.load(PATH))

In [11]:
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[i] = classs[0]

class_correct = list(0. for i in range(120))
class_total = list(0. for i in range(120))
correct = 0.0
total = 0.0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
meanacc1 = (100 * correct / total)
newmap = {}
for i in range(120):
    newmap[classmap[i]] = 100 * class_correct[i] / class_total[i]
        
sortedmap = sorted(newmap.items(), key=lambda item: item[1], reverse=True)
for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc1)

Accuracy of             Bedlington_terrier :        100 %
Accuracy of                         vizsla :        100 %
Accuracy of                        Samoyed :        100 %
Accuracy of                          dingo :        100 %
Accuracy of                   Afghan_hound :         98 %
Accuracy of               Japanese_spaniel :         97 %
Accuracy of               Blenheim_spaniel :         97 %
Accuracy of                     Weimaraner :         97 %
Accuracy of                        clumber :         96 %
Accuracy of                            pug :         95 %
Accuracy of             Norwegian_elkhound :         95 %
Accuracy of                           chow :         95 %
Accuracy of             Kerry_blue_terrier :         95 %
Accuracy of                   Ibizan_hound :         94 %
Accuracy of            African_hunting_dog :         94 %
Accuracy of                       keeshond :         94 %
Accuracy of                     schipperke :         93 %
Accuracy of   

In [None]:
'''
BELOW are other CNNs, PERFORMS POORLY compared to resnet transfer
'''

In [None]:
'''
THIS IS PYTORCH TUTORIAL NET (NOT GOOD FOR FINE GRAIN)
'''

# import torch.nn as nn
# import torch.nn.functional as F


# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 60, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(60, 16, 5)
#         self.fc1 = nn.Linear(16 * 53*53, 1000)
#         self.fc2 = nn.Linear(1000, 120)
# #         self.fc3 = nn.Linear(84, 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(x.size(0), 16*53*53)
#         x = F.relu(self.fc1(x))
#         #x = F.relu(self.fc2(x))
#         x = self.fc2(x)
#         return x


# net = Net()
# net.to(device)

In [None]:
'''
THIS IS BETTER NETWORK FOUND ON MEDIUM POST FOR FINE GRAINED CLASSIFICATION:
https://medium.com/@uijaz59/dog-breed-classification-using-pytorch-207cf27c2031
'''

# class Net(nn.Module):
    
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 16, 3)
#         self.conv2 = nn.Conv2d(16, 32, 3)
#         self.conv3 = nn.Conv2d(32, 64, 3)
#         self.conv4 = nn.Conv2d(64, 128, 3)
#         self.conv5 = nn.Conv2d(128, 256, 3)
#         self.fc1 = nn.Linear(256 * 6 * 6, 120)
        
#         self.max_pool = nn.MaxPool2d(2, 2,ceil_mode=True)
#         self.dropout = nn.Dropout(0.2)
#         self.conv_bn1 = nn.BatchNorm2d(224,3)
#         self.conv_bn2 = nn.BatchNorm2d(16)
#         self.conv_bn3 = nn.BatchNorm2d(32)
#         self.conv_bn4 = nn.BatchNorm2d(64)
#         self.conv_bn5 = nn.BatchNorm2d(128)
#         self.conv_bn6 = nn.BatchNorm2d(256)
    
#     def forward(self, x):
        
#         x = F.relu(self.conv1(x))
#         x = self.max_pool(x)
#         x = self.conv_bn2(x)
        
#         x = F.relu(self.conv2(x))
#         x = self.max_pool(x)
#         x = self.conv_bn3(x)
        
#         x = F.relu(self.conv3(x))
#         x = self.max_pool(x)
#         x = self.conv_bn4(x)
        
#         x = F.relu(self.conv4(x))
#         x = self.max_pool(x)
#         x = self.conv_bn5(x)
        
#         x = F.relu(self.conv5(x))
#         x = self.max_pool(x)
#         x = self.conv_bn6(x)
        
#         x = x.view(-1, 256 * 6 * 6)
        
#         x = self.dropout(x)
#         x = self.fc1(x)
#         return x

# net = Net()
# net.to(device)