In [1]:
import math
import numpy as np
import os
%matplotlib inline
import matplotlib.pyplot as plt

cover_dir = 'data/covers'
cover_paths = os.listdir(cover_dir)
len(cover_paths)


3967

In [2]:
cover_paths.sort()
print(cover_paths[:10])


['000714685X_Religion & Spirituality.jpg', '002346450X_Politics & Social Sciences.jpg', '006095289X_Health, Fitness & Dieting.jpg', '006167012X_Religion & Spirituality.jpg', '006176678X_Biographies & Memoirs.jpg', '006198583X_Health, Fitness & Dieting.jpg', '006250407X_Christian Books & Bibles.jpg', '007166470X_Business & Money.jpg', '007786171X_Business & Money.jpg', '007877800X_Science & Math.jpg']


In [3]:
from PIL import Image
from utils_labels import category_string_to_number

def load_images(data_root, image_paths, target_size):
    images = []
    labels = []
    for img_path in image_paths:
        path = os.path.join(data_root, img_path)
        img = Image.open(path)
        img = img.resize(target_size)
        images.append(np.array(img))
        img_label = img_path[img_path.find("_")+1:-4]
        img_label = category_string_to_number(img_label)
        labels.append(img_label)
    return images, labels


In [7]:
n_imgs = 4000
target_size = (227, 227)

# we can't load many images this way
cover_images, cover_labels = load_images(cover_dir, cover_paths[:n_imgs], target_size)
len(cover_images)


3967

In [8]:
# shuffle
np.random.shuffle(cover_images)

# set aside 20% for testing
n_test = math.floor(len(cover_images) * 0.02)
covers_train = cover_images[n_test:]
labels_train =  cover_labels[n_test:]
covers_test = cover_images[:n_test]
labels_test =  cover_labels[:n_test]
print(len(labels_train))
print(labels_train[20])

3888
10


### Dataset modifications 

In [9]:
from torchvision.transforms import ToTensor, Normalize, Compose, Resize
transform = Compose([   #Resize(224,224),
                        ToTensor(),
                        Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
                    ])

from torch.utils.data import Dataset

class CoverDataset(Dataset):
    def __init__(self, data, target, transform):
        self.data = data
        self.target = target
        self.transform=transform
    def __getitem__(self, index):
        image = transform(self.data[index])
        target = torch.tensor(self.target[index]).long() # needs floa
        return image, target
    def __len__(self):
        return len(self.data)
    
    
# define the datasets
train_dataset = CoverDataset(covers_train, labels_train, transform)
test_dataset = CoverDataset(covers_test, labels_test, transform)


print("Amount in the training set: " + str(len(train_dataset)))
print("Amount in the validation set: " + str(len(test_dataset)))


# define the dataloaders, batch_size 128
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
#val_loader = DataLoader(val_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True)

Amount in the training set: 3888
Amount in the validation set: 79


## PYTORCH: IMPORTING PRETRAINED ALEXNET

##### Please note that 
##### i) The download may take a while
##### ii) The pretrained models from the pytorch models have very specific requirements regarding input, see https://pytorch.org/docs/stable/torchvision/models.html

In [15]:
import torch
from torchvision import *
import torchvision.models as models
import torch.nn as nn

alexnet = models.alexnet(pretrained=True)

#### Just as in the paper [], only retrain the last (fc) layer:

In [18]:
for param in alexnet.parameters():
    param.requires_grad = False # == do not change weights, do not re-train

# fixed, pre-trained alexnet. Now, replace the last layer:
alexnet.classifier._modules['6'] = nn.Linear(4096, 30)
print(*list(alexnet.children()))

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
) AdaptiveAvgPool2d(output_size=(6, 6)) Sequential(
  (0): Dropout(p=0.5)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace)
  (3): Dropout(p=0.5)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace)

In [19]:
import torch.nn as nn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(alexnet.parameters(), lr=0.003)

from utils_train import train, test, fit
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
alexnet = alexnet.to(device)

n_epochs = 20
alexnet_retrain = fit(train_loader, test_loader, model=alexnet, optimizer=optimizer, loss_fn=loss_fn, n_epochs=n_epochs)


Epoch 1/20: train_loss: 3.3801, train_accuracy: 21.2449, val_loss: 3.3710, val_accuracy: 16.4557
Epoch 2/20: train_loss: 2.0797, train_accuracy: 34.0278, val_loss: 3.4963, val_accuracy: 21.5190
Epoch 3/20: train_loss: 1.7054, train_accuracy: 43.3385, val_loss: 4.0301, val_accuracy: 11.3924
Epoch 4/20: train_loss: nan, train_accuracy: 46.6307, val_loss: 3.9739, val_accuracy: 13.9241
Epoch 5/20: train_loss: nan, train_accuracy: 49.5885, val_loss: 4.0978, val_accuracy: 15.1899
Epoch 6/20: train_loss: 1.4223, train_accuracy: 52.1091, val_loss: 4.2184, val_accuracy: 22.7848
Epoch 7/20: train_loss: 1.4340, train_accuracy: 52.8292, val_loss: 4.2763, val_accuracy: 12.6582
Epoch 8/20: train_loss: nan, train_accuracy: 56.4815, val_loss: 4.6584, val_accuracy: 11.3924
Epoch 9/20: train_loss: nan, train_accuracy: 56.8673, val_loss: 4.8274, val_accuracy: 18.9873
Epoch 10/20: train_loss: 1.3638, train_accuracy: 57.5874, val_loss: 4.8384, val_accuracy: 15.1899
Epoch 11/20: train_loss: 1.3022, train_ac