<a href="https://colab.research.google.com/github/maxwellreynolds/ComputerVision_Notebooks/blob/master/CS_2770_HW2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Part A: Loading and Using a Pretrained Network as a Feature Extractor 

In [25]:
print("GPU Model: %s" % torch.cuda.get_device_name(0))

GPU Model: Tesla T4


In [26]:
data_transforms = {
    'train': transforms.Compose([
      transforms.Resize((224,224)),
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  ]),
  'val': transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  ]),
  'test': transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  ]),
}

In [27]:
data_dir = 'hw2_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
  for x in ['train', 'val', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8, shuffle=True, num_workers=4)
  for x in ['train', 'val' , 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes



  cpuset_checked))


In [28]:
class VGG16_Feature_Extraction(torch.nn.Module):
  def __init__(self):
    super(VGG16_Feature_Extraction, self).__init__()
    VGG16_Pretrained = models.vgg16(pretrained=True)
    self.features = VGG16_Pretrained.features
    self.avgpool = VGG16_Pretrained.avgpool
    self.feature_extractor = nn.Sequential(*[VGG16_Pretrained.classifier[i] for i in range(6)])
  def forward(self, x):
    x = self.features(x)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.feature_extractor(x)
    return x




In [29]:
# models.vgg16(pretrained=True).classifier

In [30]:
model=VGG16_Feature_Extraction()
device = 'cuda:0'
model=model.to(device)

In [32]:
image_features = {}
image_labels = {}
for phase in ['train', 'test']:
  for inputs, labels in dataloaders[phase]:
    inputs=inputs.to(device)
    model_prediction = model(inputs)
    model_prediction_numpy=model_prediction.cpu().detach().numpy()
    if phase not in image_features:
      image_features[phase]=model_prediction_numpy
      image_labels[phase]=labels.numpy()
    else:
      image_features[phase] = np.concatenate((image_features[phase], model_prediction_numpy), axis=0)
      image_labels[phase] = np.concatenate((image_labels[phase], labels.numpy()), axis=0)


  cpuset_checked))


In [46]:
#Transform data to mean 0 std 1
scaler=StandardScaler().fit(image_features['train'])
train_features_scaled=scaler.transform(image_features['train'])
scaler=StandardScaler().fit(image_features['test'])
test_features_scaled=scaler.transform(image_features['test'])



In [67]:
# clf = make_pipeline(StandardScaler(), svm.LinearSVC(random_state=0, tol=1e-5))
# clf.fit(train_features_scaled, image_labels['train'])
# y_pred=clf.predict(train_features_scaled)
# y_true=image_labels['train']
# print("Sanity check: ",accuracy_score(y_true,y_pred), " train accuracy")
# y_pred=clf.predict(test_features_scaled)
# y_true=image_labels['test']
# print("Test accuracy: ",accuracy_score(y_true,y_pred))


Sanity check:  1.0  train accuracy
Test accuracy:  0.5168067226890757


In [69]:
model=svm.LinearSVC(random_state=0, tol=1e-5).fit(train_features_scaled, image_labels['train'])

In [70]:
y_pred=model.predict(train_features_scaled)
y_true=image_labels['train']
print("Sanity check: ",accuracy_score(y_true,y_pred), " train accuracy")
y_pred=model.predict(test_features_scaled)
y_true=image_labels['test']
print("Test accuracy: ",accuracy_score(y_true,y_pred))

Sanity check:  1.0  train accuracy
Test accuracy:  0.5168067226890757


In [75]:
confusion_matrix(y_true, y_pred)

array([[19,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  1,
         0,  0,  1,  1],
       [ 0, 14,  0,  1,  1,  0,  2,  0,  0,  1,  0,  0,  0,  3,  1,  0,
         0,  0,  0,  2],
       [ 0,  0, 19,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,
         0,  0,  0,  2],
       [ 0,  1,  0, 20,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,
         0,  1,  0,  1],
       [ 0,  1,  0,  1,  5,  0,  0,  0,  2,  0,  6,  0,  0,  0,  1,  3,
         0,  1,  0,  5],
       [ 0,  0,  0,  0,  0, 16,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  1,  2,  0],
       [ 2,  4,  0,  2,  0,  1,  9,  0,  0,  0,  1,  1,  0,  2,  0,  1,
         0,  0,  1,  1],
       [ 0,  0,  0,  0,  1,  0,  0, 17,  0,  0,  0,  3,  0,  0,  0,  1,
         0,  3,  0,  0],
       [ 0,  0,  1,  0,  3,  0,  0,  0,  5,  0,  7,  0,  1,  0,  1,  3,
         0,  1,  0,  3],
       [ 0,  0,  0,  1,  0,  0,  1,  0,  1,  5,  0,  2,  2,  0,  2,  0,
         1,  0,  0,  0],
       [ 0,  1,  0,  0,  6,  0

In [88]:
conf_mat=confusion_matrix(y_true, y_pred)
for i in range(conf_mat.shape[0]):
  class_accuracy=(conf_mat[i][i])/(conf_mat[i].sum())
  print(class_names[i], class_accuracy)

aeroplane 0.76
bicycle 0.56
bird 0.76
boat 0.8
bottle 0.2
bus 0.7619047619047619
car 0.36
cat 0.68
chair 0.2
cow 0.3333333333333333
diningtable 0.32
dog 0.36
horse 0.5217391304347826
motorbike 0.72
person 0.24
pottedplant 0.28
sheep 0.35294117647058826
sofa 0.44
train 0.84
tvmonitor 0.76


The model was able to generalize to some degree. Although the accuracy was only around 51%, this is still significantly better than random chance given that there are 20 classes. Accuracy was highest for train, tvmonitor, aeroplane, and bus while lowest for bottle, chair, person. One odd miscslassification was water bottles classified more often as dining tables than water bottles.

# Part B: Train and Test the CNN on Our Dataset