In [53]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.cluster import KMeans

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cuda:0


In [54]:
trans = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
data = torchvision.datasets.ImageFolder(root='Images', transform=trans)
traindata, testdata = torch.utils.data.random_split(data, [12000, 8580])
print(traindata, testdata)

<torch.utils.data.dataset.Subset object at 0x0000023A158794C0> <torch.utils.data.dataset.Subset object at 0x0000023A15879CA0>


In [55]:
trainloader = torch.utils.data.DataLoader(traindata, batch_size=1, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=1, shuffle=True)
print(trainloader, testloader)

<torch.utils.data.dataloader.DataLoader object at 0x0000023A2462F970> <torch.utils.data.dataloader.DataLoader object at 0x0000023A2462FA30>


In [56]:
import torchvision.models as models
net = models.resnet50(pretrained=True)

layer = net._modules.get('avgpool')
net.eval()
net.to(device)
# num = net.fc.in_features
# net.fc = nn.Linear(num, 120)
# net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [59]:
"""   K MEANS   """

features = np.empty((0, 2048))
train_labels = np.zeros(0)
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data[0].to(device), data[1].to(device)
    my_embedding = torch.zeros(2048)
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))

    h = layer.register_forward_hook(copy_data)

    net(inputs)
    h.remove()

    features = np.append(features, my_embedding.reshape(1, len(my_embedding)), axis=0)
    train_labels = np.append(train_labels, labels.cpu().numpy(), axis=0)
print('Finished Training')
print(features.shape)

Finished Training
(12000, 2048)


In [61]:
testfeatures = np.empty((0, 2048))
testlabels = np.zeros(0)
for i, data in enumerate(testloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data[0].to(device), data[1].to(device)
    my_embedding = torch.zeros(2048)
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))

    h = layer.register_forward_hook(copy_data)
    
    net(inputs)
    h.remove()

    testfeatures = np.append(testfeatures, my_embedding.reshape(1, len(my_embedding)), axis=0)
    testlabels = np.append(testlabels, labels.cpu().numpy(), axis=0)
print(testfeatures.shape)

(8580, 2048)


In [62]:
from sklearn import svm
clf = svm.SVC()
clf.fit(features, train_labels)
preds = clf.predict(testfeatures)

In [63]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(testlabels, preds)
np.savetxt('confusion.txt', cm)

from sklearn.metrics import accuracy_score
meanacc = accuracy_score(testlabels, preds)*100
classacc = 100*(cm.diagonal()/cm.sum(axis=1))
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[classs[0]] = classacc[i]
    
sortedmap = sorted(classmap.items(), key=lambda item: item[1], reverse=True)

for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc)

Accuracy of                        Samoyed :         99 %
Accuracy of                       keeshond :         98 %
Accuracy of           Bernese_mountain_dog :         97 %
Accuracy of                            pug :         97 %
Accuracy of                  Saint_Bernard :         97 %
Accuracy of    West_Highland_white_terrier :         97 %
Accuracy of               Sealyham_terrier :         96 %
Accuracy of             Bedlington_terrier :         96 %
Accuracy of                       Leonberg :         94 %
Accuracy of                 Border_terrier :         93 %
Accuracy of                   Afghan_hound :         93 %
Accuracy of                           chow :         93 %
Accuracy of                    Boston_bull :         93 %
Accuracy of            African_hunting_dog :         93 %
Accuracy of         curly_coated_retriever :         92 %
Accuracy of                  Border_collie :         92 %
Accuracy of                   bull_mastiff :         92 %
Accuracy of   

In [64]:
kmeans = KMeans(n_clusters=120)
kmeans.fit(features)
preds = kmeans.predict(testfeatures)
print(testlabels.shape)

(8580,)


In [65]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(testlabels, preds)
np.savetxt('confusion.txt', cm)

meanacc = accuracy_score(testlabels, preds)*100
classacc = 100*(cm.diagonal()/(cm.sum(axis=1)+0.01))
f = open("classes.txt", "r")
classmap = {}
for i in range(120):
    classs = f.readline().split(" ")
    classmap[classs[0]] = classacc[i]
    
sortedmap = sorted(classmap.items(), key=lambda item: item[1], reverse=True)

for k, v in sortedmap:
    print('Accuracy of %30s : %10d %%' % (k, v))
print('Mean accuracy: %5f %%' % meanacc)

Accuracy of                  affenpinscher :         49 %
Accuracy of                       Shih_Tzu :          8 %
Accuracy of               golden_retriever :          7 %
Accuracy of                     schipperke :          3 %
Accuracy of                   Irish_setter :          3 %
Accuracy of      Staffordshire_bullterrier :          1 %
Accuracy of                     bloodhound :          1 %
Accuracy of                      Chihuahua :          0 %
Accuracy of               Japanese_spaniel :          0 %
Accuracy of                    Maltese_dog :          0 %
Accuracy of                       Pekinese :          0 %
Accuracy of               Blenheim_spaniel :          0 %
Accuracy of                       papillon :          0 %
Accuracy of                    toy_terrier :          0 %
Accuracy of            Rhodesian_ridgeback :          0 %
Accuracy of                   Afghan_hound :          0 %
Accuracy of                         basset :          0 %
Accuracy of   