In [1]:
import os
from os import listdir
from os.path import isfile, join

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import Image, ImageFile
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

In [5]:
parent_dir = './dataset'
sub_dirs = next(os.walk(parent_dir))[1]
print (sub_dirs)

['motorbike', 'aeroplane', 'dog', 'bottle', 'boat', 'pottedplant', 'cow', 'horse', 'person', 'car', 'cat', 'diningtable', 'bus', 'bird', 'bicycle', 'chair']


We have 16 folders containing images from the above mentioned categories.  
Now we will create a dataframe containing category, file_name, and encoding of the catgeory.

In [7]:
files_details = []
for sub_dir in sub_dirs:
    dir_path = join(parent_dir, sub_dir)
    files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
    for file in files:
        file_detail = {}
        file_detail['class'] = sub_dir
        file_detail['path'] = join(parent_dir, sub_dir, file)
        if os.path.getsize(file_detail['path'])>0:
            files_details.append(file_detail)
        else:
            pass
        
lbl = LabelEncoder()
df = pd.DataFrame(files_details)
df['class_encoded'] = lbl.fit_transform(df['class'])

In [9]:
df.head()

Unnamed: 0,class,path,class_encoded
0,motorbike,./dataset/motorbike/2008_008246.jpg,13
1,motorbike,./dataset/motorbike/2008_007739.jpg,13
2,motorbike,./dataset/motorbike/2008_002926.jpg,13
3,motorbike,./dataset/motorbike/2008_007485.jpg,13
4,motorbike,./dataset/motorbike/2008_007054.jpg,13


In [10]:
df.shape, df['class'].nunique()

((793, 3), 16)

In [11]:
df['class'].value_counts()

aeroplane      50
bicycle        50
cow            50
car            50
dog            50
chair          50
person         50
bus            50
motorbike      50
cat            50
bottle         50
diningtable    50
horse          50
bird           50
boat           50
pottedplant    43
Name: class, dtype: int64

In [12]:
class ImageDataset(Dataset):

    def __init__(self, df, transform):
        self.data = df
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.loc[idx, 'path']
        image = Image.open(img_name)
        image = self.transform(image)
        label = torch.tensor(self.data.loc[idx, 'class_encoded'])
        return {'image': image, 'label': label}

In [13]:
num_epochs = 10
num_classes = 16
batch_size = 16
learning_rate = 0.001

In [14]:
image_transforms = { 
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}

In [15]:
df = df.sample(frac=1, random_state=12).reset_index(drop=True)

In [16]:
df.head()

Unnamed: 0,class,path,class_encoded
0,bottle,./dataset/bottle/2008_002864.jpg,4
1,chair,./dataset/chair/2008_001841.jpg,8
2,chair,./dataset/chair/2008_007742.jpg,8
3,car,./dataset/car/2008_001208.jpg,6
4,pottedplant,./dataset/pottedplant/2008_005214.jpg,15


In [17]:
train, valid = train_test_split(df, test_size=0.2)
train.reset_index(drop = True, inplace = True)
valid.reset_index(drop = True, inplace = True)
train.shape, valid.shape

((634, 3), (159, 3))

In [18]:
train.head()

Unnamed: 0,class,path,class_encoded
0,pottedplant,./dataset/pottedplant/2008_000275.jpg,15
1,chair,./dataset/chair/2008_001563.jpg,8
2,bus,./dataset/bus/2008_008343.jpg,5
3,person,./dataset/person/2008_004307.jpg,14
4,pottedplant,./dataset/pottedplant/2008_005111.jpg,15


In [19]:
valid.head()

Unnamed: 0,class,path,class_encoded
0,chair,./dataset/chair/2008_005616.jpg,8
1,cow,./dataset/cow/2008_000905.jpg,9
2,bird,./dataset/bird/2008_004973.jpg,2
3,motorbike,./dataset/motorbike/2008_006944.jpg,13
4,dog,./dataset/dog/2008_002536.jpg,11


In [20]:
train_dataset = ImageDataset(train, transform=image_transforms['train'])
valid_dataset = ImageDataset(valid, transform=image_transforms['valid'])

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True)
val_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, num_workers=4)

In [21]:
len(train_data_loader), len(val_data_loader)

(40, 10)

### Resnet50

In [42]:
model = torchvision.models.resnet50(pretrained=True)

In [46]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [23]:
for param in model.parameters():
    param.requires_grad = False

In [24]:
fc_inputs = model.fc.in_features
 
model.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.Dropout(0.4),
    nn.Linear(256, 64),
    nn.Dropout(0.4),
    nn.Linear(64, 16)
)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [25]:
total_step = len(train_data_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    total_correct_preds = 0
    total_samples = 0
    total_loss = 0
    correct_labels = []
    predicted_labels = []
    for i, image_label in enumerate(train_data_loader):
        image, label = image_label['image'], image_label['label']
        #image = image.cuda(), label.cuda()
        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs, label)
        loss_list.append(loss.item())
        loss.backward()
        optimizer.step()
        total = label.size(0)
        predicted = outputs.max(1)[1]
        correct = (predicted == label).sum().item()
        acc_list.append(correct / total)
        total_correct_preds += correct
        total_samples += total
        total_loss += loss.item() * total
        
        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())
        
    print('Epoch [{}/{}]'.format(epoch + 1, num_epochs))
    print('Loss: {:.4f}, Accuracy: {:.2f}'.format(loss.item(),(total_correct_preds / total_samples)))
    print('Cohen Kappa: {:.2f}'.format(cohen_kappa_score(correct_labels, predicted_labels)))

Epoch [1/10]
Loss: 1.7937, Accuracy: 0.22
Cohen Kappa: 0.16
Epoch [2/10]
Loss: 1.3470, Accuracy: 0.48
Cohen Kappa: 0.44
Epoch [3/10]
Loss: 1.5588, Accuracy: 0.56
Cohen Kappa: 0.53
Epoch [4/10]
Loss: 0.9333, Accuracy: 0.61
Cohen Kappa: 0.58
Epoch [5/10]
Loss: 1.3213, Accuracy: 0.59
Cohen Kappa: 0.56
Epoch [6/10]
Loss: 2.3609, Accuracy: 0.64
Cohen Kappa: 0.61
Epoch [7/10]
Loss: 1.5542, Accuracy: 0.65
Cohen Kappa: 0.62
Epoch [8/10]
Loss: 0.8756, Accuracy: 0.68
Cohen Kappa: 0.66
Epoch [9/10]
Loss: 2.0108, Accuracy: 0.66
Cohen Kappa: 0.63
Epoch [10/10]
Loss: 1.7019, Accuracy: 0.66
Cohen Kappa: 0.64


In [26]:
with torch.no_grad():
    model.eval()
    total_correct_preds = 0
    total_samples = 0
    correct_labels = []
    predicted_labels = []
    for image_label in val_data_loader:
        image, label = image_label['image'], image_label['label']
        outputs = model(image)

        # Class with the highest probability is our predicted class
        equality = (label.data == outputs.max(1)[1])
        # Accuracy is number of correct predictions divided by all predictions
        correct_preds = equality.type_as(torch.FloatTensor()).sum()
        samples = len(equality)
        total_correct_preds += correct_preds
        total_samples += samples

        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())

    print("Test accuracy: {:.3f}".format(total_correct_preds/total_samples))
    print("Test cohen score: {:.3f}".format(cohen_kappa_score(correct_labels, predicted_labels)))

Test accuracy: 0.629
Test cohen score: 0.602


In [40]:
model = torchvision.models.resnet18(pretrained=True)

In [41]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [34]:
for param in model.parameters():
    param.requires_grad = False

In [35]:
fc_inputs = model.fc.in_features
 
model.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.Dropout(0.4),
    nn.Linear(256, 64),
    nn.Dropout(0.4),
    nn.Linear(64, 16)
)

In [36]:
total_step = len(train_data_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    total_correct_preds = 0
    total_samples = 0
    total_loss = 0
    correct_labels = []
    predicted_labels = []
    for i, image_label in enumerate(train_data_loader):
        image, label = image_label['image'], image_label['label']
        #image = image.cuda(), label.cuda()
        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs, label)
        loss_list.append(loss.item())
        loss.backward()
        optimizer.step()
        total = label.size(0)
        predicted = outputs.max(1)[1]
        correct = (predicted == label).sum().item()
        acc_list.append(correct / total)
        total_correct_preds += correct
        total_samples += total
        total_loss += loss.item() * total
        
        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())
        
    print('Epoch [{}/{}]'.format(epoch + 1, num_epochs))
    print('Loss: {:.4f}, Accuracy: {:.2f}'.format(loss.item(),(total_correct_preds / total_samples)))
    print('Cohen Kappa: {:.2f}'.format(cohen_kappa_score(correct_labels, predicted_labels)))

Epoch [1/10]
Loss: 3.0182, Accuracy: 0.09
Cohen Kappa: 0.03
Epoch [2/10]
Loss: 2.9316, Accuracy: 0.09
Cohen Kappa: 0.02
Epoch [3/10]
Loss: 3.0758, Accuracy: 0.07
Cohen Kappa: 0.00
Epoch [4/10]
Loss: 2.9017, Accuracy: 0.08
Cohen Kappa: 0.02
Epoch [5/10]
Loss: 2.6756, Accuracy: 0.08
Cohen Kappa: 0.02
Epoch [6/10]
Loss: 2.9589, Accuracy: 0.09
Cohen Kappa: 0.02
Epoch [7/10]
Loss: 2.8108, Accuracy: 0.07
Cohen Kappa: 0.01
Epoch [8/10]
Loss: 2.9570, Accuracy: 0.07
Cohen Kappa: 0.01
Epoch [9/10]
Loss: 2.8201, Accuracy: 0.07
Cohen Kappa: 0.01
Epoch [10/10]
Loss: 2.9215, Accuracy: 0.09
Cohen Kappa: 0.03


In [37]:
with torch.no_grad():
    model.eval()
    total_correct_preds = 0
    total_samples = 0
    correct_labels = []
    predicted_labels = []
    for image_label in val_data_loader:
        image, label = image_label['image'], image_label['label']
        outputs = model(image)

        # Class with the highest probability is our predicted class
        equality = (label.data == outputs.max(1)[1])
        # Accuracy is number of correct predictions divided by all predictions
        correct_preds = equality.type_as(torch.FloatTensor()).sum()
        samples = len(equality)
        total_correct_preds += correct_preds
        total_samples += samples

        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())

    print("Test accuracy: {:.3f}".format(total_correct_preds/total_samples))
    print("Test cohen score: {:.3f}".format(cohen_kappa_score(correct_labels, predicted_labels)))

Test accuracy: 0.113
Test cohen score: 0.050


### Alexnet

In [38]:
model = torchvision.models.alexnet(pretrained='imagenet')

In [39]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p

In [29]:
num_ftrs = model.classifier[6].in_features

In [30]:
model.classifier[6] = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.Dropout(0.4),
    nn.Linear(256, 64),
    nn.Dropout(0.4),
    nn.Linear(64, 16)
)

In [31]:
total_step = len(train_data_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    total_correct_preds = 0
    total_samples = 0
    total_loss = 0
    correct_labels = []
    predicted_labels = []
    for i, image_label in enumerate(train_data_loader):
        image, label = image_label['image'], image_label['label']
        #image = image.cuda(), label.cuda()
        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs, label)
        loss_list.append(loss.item())
        loss.backward()
        optimizer.step()
        total = label.size(0)
        predicted = outputs.max(1)[1]
        correct = (predicted == label).sum().item()
        acc_list.append(correct / total)
        total_correct_preds += correct
        total_samples += total
        total_loss += loss.item() * total
        
        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())
        
    print('Epoch [{}/{}]'.format(epoch + 1, num_epochs))
    print('Loss: {:.4f}, Accuracy: {:.2f}'.format(loss.item(),(total_correct_preds / total_samples)))
    print('Cohen Kappa: {:.2f}'.format(cohen_kappa_score(correct_labels, predicted_labels)))

Epoch [1/10]
Loss: 2.8005, Accuracy: 0.07
Cohen Kappa: 0.00
Epoch [2/10]
Loss: 2.7743, Accuracy: 0.05
Cohen Kappa: -0.02
Epoch [3/10]
Loss: 2.8031, Accuracy: 0.07
Cohen Kappa: 0.01
Epoch [4/10]
Loss: 2.7830, Accuracy: 0.05
Cohen Kappa: -0.02
Epoch [5/10]
Loss: 2.7813, Accuracy: 0.05
Cohen Kappa: -0.01
Epoch [6/10]
Loss: 2.7885, Accuracy: 0.07
Cohen Kappa: 0.00
Epoch [7/10]
Loss: 2.7673, Accuracy: 0.06
Cohen Kappa: -0.01
Epoch [8/10]
Loss: 2.8003, Accuracy: 0.06
Cohen Kappa: -0.01
Epoch [9/10]
Loss: 2.7799, Accuracy: 0.08
Cohen Kappa: 0.01
Epoch [10/10]
Loss: 2.7988, Accuracy: 0.06
Cohen Kappa: -0.01


In [32]:
with torch.no_grad():
    model.eval()
    total_correct_preds = 0
    total_samples = 0
    correct_labels = []
    predicted_labels = []
    for image_label in val_data_loader:
        image, label = image_label['image'], image_label['label']
        outputs = model(image)

        # Class with the highest probability is our predicted class
        equality = (label.data == outputs.max(1)[1])
        # Accuracy is number of correct predictions divided by all predictions
        correct_preds = equality.type_as(torch.FloatTensor()).sum()
        samples = len(equality)
        total_correct_preds += correct_preds
        total_samples += samples

        correct_labels.extend(label.data.cpu().numpy())
        predicted_labels.extend(outputs.max(1)[1].cpu().numpy())

    print("Test accuracy: {:.3f}".format(total_correct_preds/total_samples))
    print("Test cohen score: {:.3f}".format(cohen_kappa_score(correct_labels, predicted_labels)))

Test accuracy: 0.031
Test cohen score: 0.000
