In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cuda:0


In [2]:
trans = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(), # randomly flip and rotate
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
data = torchvision.datasets.ImageFolder(root='Images', transform=trans)
traindata, testdata = torch.utils.data.random_split(data, [12000, 8580])
print(traindata, testdata)

<torch.utils.data.dataset.Subset object at 0x0000028024904E50> <torch.utils.data.dataset.Subset object at 0x00000280249045B0>


In [3]:
trainloader = torch.utils.data.DataLoader(traindata, batch_size=4, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size = 4, shuffle=True)
print(trainloader, testloader)

<torch.utils.data.dataloader.DataLoader object at 0x0000028024904CD0> <torch.utils.data.dataloader.DataLoader object at 0x0000028024904FA0>


In [4]:
import torchvision.models as models
net = models.resnet152(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)
net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [6]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print every 1000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  1000] loss: 1.705
[1,  2000] loss: 1.071
[1,  3000] loss: 0.900
[2,  1000] loss: 0.749
[2,  2000] loss: 0.732
[2,  3000] loss: 0.720
[3,  1000] loss: 0.579
[3,  2000] loss: 0.617
[3,  3000] loss: 0.610
[4,  1000] loss: 0.488
[4,  2000] loss: 0.545
[4,  3000] loss: 0.530
[5,  1000] loss: 0.437
[5,  2000] loss: 0.426
[5,  3000] loss: 0.443
[6,  1000] loss: 0.384
[6,  2000] loss: 0.383
[6,  3000] loss: 0.412
[7,  1000] loss: 0.315
[7,  2000] loss: 0.351
[7,  3000] loss: 0.338
[8,  1000] loss: 0.277
[8,  2000] loss: 0.290
[8,  3000] loss: 0.305
[9,  1000] loss: 0.224
[9,  2000] loss: 0.271
[9,  3000] loss: 0.275
[10,  1000] loss: 0.215
[10,  2000] loss: 0.250
[10,  3000] loss: 0.260
[11,  1000] loss: 0.199
[11,  2000] loss: 0.217
[11,  3000] loss: 0.226
[12,  1000] loss: 0.180
[12,  2000] loss: 0.196
[12,  3000] loss: 0.206
[13,  1000] loss: 0.151
[13,  2000] loss: 0.168
[13,  3000] loss: 0.192
[14,  1000] loss: 0.151
[14,  2000] loss: 0.157
[14,  3000] loss: 0.168
[15,  1000] loss: 0

In [16]:
PATH = './resnet.pth'
torch.save(net.state_dict(), PATH)

In [None]:
net = Net()
net.to(device)
PATH = './model2.pth'
net.load_state_dict(torch.load(PATH))

In [7]:
correct = 0.0
total = 0.0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
meanacc1 = (100 * correct / total)
print('Accuracy of the network on the 8580 test images: %.3f %%' % (
    100 * correct / total))

Accuracy of the network on the 8580 test images: 70.653 %


In [8]:
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[i] = classs[0]

In [9]:
class_correct = list(0. for i in range(120))
class_total = list(0. for i in range(120))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

newmap = {}
for i in range(120):
    newmap[classmap[i]] = 100 * class_correct[i] / class_total[i]
        
sortedmap = sorted(newmap.items(), key=lambda item: item[1], reverse=True)
for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc1)

Accuracy of            African_hunting_dog :         95 %
Accuracy of                       komondor :         93 %
Accuracy of                  Saint_Bernard :         91 %
Accuracy of                        clumber :         91 %
Accuracy of                   Afghan_hound :         90 %
Accuracy of               Blenheim_spaniel :         90 %
Accuracy of             Norwegian_elkhound :         90 %
Accuracy of                         Saluki :         89 %
Accuracy of                           chow :         88 %
Accuracy of          flat_coated_retriever :         88 %
Accuracy of                    groenendael :         87 %
Accuracy of                     Eskimo_dog :         87 %
Accuracy of                   Ibizan_hound :         87 %
Accuracy of         curly_coated_retriever :         86 %
Accuracy of                       bluetick :         86 %
Accuracy of              Brabancon_griffon :         86 %
Accuracy of                       Leonberg :         86 %
Accuracy of   

In [None]:
'''
BELOW are other CNNs, PERFORMS POORLY compared to resnet transfer
'''

In [None]:
'''
THIS IS PYTORCH TUTORIAL NET (NOT GOOD FOR FINE GRAIN)
'''

# import torch.nn as nn
# import torch.nn.functional as F


# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 60, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(60, 16, 5)
#         self.fc1 = nn.Linear(16 * 53*53, 1000)
#         self.fc2 = nn.Linear(1000, 120)
# #         self.fc3 = nn.Linear(84, 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(x.size(0), 16*53*53)
#         x = F.relu(self.fc1(x))
#         #x = F.relu(self.fc2(x))
#         x = self.fc2(x)
#         return x


# net = Net()
# net.to(device)

In [None]:
'''
THIS IS BETTER NETWORK FOUND ON MEDIUM POST FOR FINE GRAINED CLASSIFICATION:
https://medium.com/@uijaz59/dog-breed-classification-using-pytorch-207cf27c2031
'''

# class Net(nn.Module):
    
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 16, 3)
#         self.conv2 = nn.Conv2d(16, 32, 3)
#         self.conv3 = nn.Conv2d(32, 64, 3)
#         self.conv4 = nn.Conv2d(64, 128, 3)
#         self.conv5 = nn.Conv2d(128, 256, 3)
#         self.fc1 = nn.Linear(256 * 6 * 6, 120)
        
#         self.max_pool = nn.MaxPool2d(2, 2,ceil_mode=True)
#         self.dropout = nn.Dropout(0.2)
#         self.conv_bn1 = nn.BatchNorm2d(224,3)
#         self.conv_bn2 = nn.BatchNorm2d(16)
#         self.conv_bn3 = nn.BatchNorm2d(32)
#         self.conv_bn4 = nn.BatchNorm2d(64)
#         self.conv_bn5 = nn.BatchNorm2d(128)
#         self.conv_bn6 = nn.BatchNorm2d(256)
    
#     def forward(self, x):
        
#         x = F.relu(self.conv1(x))
#         x = self.max_pool(x)
#         x = self.conv_bn2(x)
        
#         x = F.relu(self.conv2(x))
#         x = self.max_pool(x)
#         x = self.conv_bn3(x)
        
#         x = F.relu(self.conv3(x))
#         x = self.max_pool(x)
#         x = self.conv_bn4(x)
        
#         x = F.relu(self.conv4(x))
#         x = self.max_pool(x)
#         x = self.conv_bn5(x)
        
#         x = F.relu(self.conv5(x))
#         x = self.max_pool(x)
#         x = self.conv_bn6(x)
        
#         x = x.view(-1, 256 * 6 * 6)
        
#         x = self.dropout(x)
#         x = self.fc1(x)
#         return x

# net = Net()
# net.to(device)