In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.cluster import KMeans

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cuda:0


In [2]:
trans = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
data = torchvision.datasets.ImageFolder(root='Images', transform=trans)
traindata, testvaldata = torch.utils.data.random_split(data, [12000, 8580])
valdata, testdata = torch.utils.data.random_split(testvaldata, [4000, 4580])

print(traindata, testdata)

<torch.utils.data.dataset.Subset object at 0x00000167A7D2F640> <torch.utils.data.dataset.Subset object at 0x00000167B4444160>


In [3]:
trainloader = torch.utils.data.DataLoader(traindata, batch_size=1, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=1, shuffle=True)
print(trainloader, testloader)

<torch.utils.data.dataloader.DataLoader object at 0x00000167B4438DF0> <torch.utils.data.dataloader.DataLoader object at 0x00000167B4438DC0>


In [4]:
import torchvision.models as models
net = models.resnet50(pretrained=True)
num = net.fc.in_features
net.fc = nn.Linear(num, 120)

PATH = './resnet50.pth'
net.load_state_dict(torch.load(PATH))

layer = net._modules.get('avgpool')
net.eval()
net.to(device)
# num = net.fc.in_features
# net.fc = nn.Linear(num, 120)
# net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
"""   K MEANS   """

features = np.empty((0, 2048))
train_labels = np.zeros(0)
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data[0].to(device), data[1].to(device)
    my_embedding = torch.zeros(2048)
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))

    h = layer.register_forward_hook(copy_data)

    net(inputs)
    h.remove()

    features = np.append(features, my_embedding.reshape(1, len(my_embedding)), axis=0)
    train_labels = np.append(train_labels, labels.cpu().numpy(), axis=0)
print('Finished Training')
print(features.shape)

Finished Training
(12000, 2048)


In [6]:
testfeatures = np.empty((0, 2048))
testlabels = np.zeros(0)
for i, data in enumerate(testloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data[0].to(device), data[1].to(device)
    my_embedding = torch.zeros(2048)
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))

    h = layer.register_forward_hook(copy_data)
    
    net(inputs)
    h.remove()

    testfeatures = np.append(testfeatures, my_embedding.reshape(1, len(my_embedding)), axis=0)
    testlabels = np.append(testlabels, labels.cpu().numpy(), axis=0)
print(testfeatures.shape)

(4580, 2048)


In [23]:
from sklearn import svm
clf = svm.SVC()
clf.fit(features, train_labels)
preds = clf.predict(testfeatures)

In [24]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(testlabels, preds)
np.savetxt('confusion.txt', cm)

from sklearn.metrics import accuracy_score
meanacc = accuracy_score(testlabels, preds)*100
classacc = 100*(cm.diagonal()/cm.sum(axis=1))
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[classs[0]] = classacc[i]
    
sortedmap = sorted(classmap.items(), key=lambda item: item[1], reverse=True)

for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc)

Accuracy of                    Maltese_dog :        100 %
Accuracy of               Blenheim_spaniel :        100 %
Accuracy of                   Afghan_hound :        100 %
Accuracy of                         Saluki :        100 %
Accuracy of                     Weimaraner :        100 %
Accuracy of                          cairn :        100 %
Accuracy of                 Scotch_terrier :        100 %
Accuracy of                         vizsla :        100 %
Accuracy of                   Irish_setter :        100 %
Accuracy of                        clumber :        100 %
Accuracy of                 cocker_spaniel :        100 %
Accuracy of                 Sussex_spaniel :        100 %
Accuracy of           Old_English_sheepdog :        100 %
Accuracy of             miniature_pinscher :        100 %
Accuracy of                  Saint_Bernard :        100 %
Accuracy of                        basenji :        100 %
Accuracy of                        Samoyed :        100 %
Accuracy of   

In [7]:
kmeans = KMeans(n_clusters=120)
kmeans.fit(features)
trainl = kmeans.labels_

testl = kmeans.predict(testfeatures)

In [9]:
from sklearn import svm
s = svm.SVC()
s.fit(features, trainl)
preds = s.predict(testfeatures)

In [13]:
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(preds, testlabels)
np.savetxt('confusion.txt', cm)
m = np.zeros(120)
max = 0
for i in range(len(cm)):
    max = 0
    for j in range(len(cm.T)):
        curr = cm[i][j]
        if curr > max:
            max = curr
            m[i] = j
newpreds = np.zeros(len(preds))
for i in range(len(preds)):
    newpreds[i] = m[preds[i]]

In [14]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(testlabels, newpreds)
np.savetxt('confusion.txt', cm)

meanacc = accuracy_score(testlabels, newpreds)*100
classacc = 100*(cm.diagonal()/(cm.sum(axis=1)))
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[classs[0]] = classacc[i]
    
sortedmap = sorted(classmap.items(), key=lambda item: item[1], reverse=True)

for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc)

Accuracy of                         basset :        100 %
Accuracy of                         borzoi :        100 %
Accuracy of             Bedlington_terrier :        100 %
Accuracy of               Sealyham_terrier :        100 %
Accuracy of          flat_coated_retriever :        100 %
Accuracy of             Labrador_retriever :        100 %
Accuracy of                        clumber :        100 %
Accuracy of               English_springer :        100 %
Accuracy of         Welsh_springer_spaniel :        100 %
Accuracy of                       komondor :        100 %
Accuracy of                     Rottweiler :        100 %
Accuracy of                     Great_Dane :        100 %
Accuracy of                  Saint_Bernard :        100 %
Accuracy of                  affenpinscher :        100 %
Accuracy of                       Leonberg :        100 %
Accuracy of                           chow :        100 %
Accuracy of            African_hunting_dog :        100 %
Accuracy of   