In [1]:
import math
import numpy as np
import os
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Normalize, Compose, Resize
from torch.utils.data import DataLoader
from utils_labels import folder_to_cat_dict
import matplotlib.pyplot as plt



['000714685X_Religion & Spirituality.jpg', '002346450X_Politics & Social Sciences.jpg', '006095289X_Health, Fitness & Dieting.jpg', '006167012X_Religion & Spirituality.jpg', '006176678X_Biographies & Memoirs.jpg', '006198583X_Health, Fitness & Dieting.jpg', '006250407X_Christian Books & Bibles.jpg', '007166470X_Business & Money.jpg', '007786171X_Business & Money.jpg', '007877800X_Science & Math.jpg']


### Dataset modifications 

In [6]:

# Normalization here given by pytorch (pretrained Alexnet -- Densenet?)
target_size = (32, 32)
transform = Compose([   #Resize(224,224),
                        ToTensor(),
                        Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
                    ])


Amount in the training set: 2857
Amount in the validation set: 714
Amount in the test set: 396


### Define Dataloaders

In [None]:

# import the covers
cover_dir_train = 'data/covers/train'
cover_dir_test = 'data/covers/test'
cover_dir_val = 'data/covers/valid'

# Normalization here given by pytorch (pretrained Alexnet -- Densenet?)
target_size = (32, 32)
transforms = Compose([Resize(target_size),
                    ToTensor(),
                    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                    ])

train_dataset = ImageFolder(cover_dir_train, transform=transforms)
test_dataset = ImageFolder(cover_dir_test, transform=transforms)
val_dataset = ImageFolder(cover_dir_val, transform=transforms)

# class is the folder name and idx a number generated by ImageFolder
class_to_idx = train_dataset.class_to_idx
# mapping of folder to category name
folder_cat_path = 'data/meta/folder_to_cat.json'
folder_to_category = folder_to_cat_dict(folder_cat_path)

print("Amount in the training set: " + str(len(train_dataset)))
print("Amount in the test set: " + str(len(test_dataset)))
print("Amount in the validation set: " + str(len(test_dataset)))

# define the dataloaders, batch_size 128, 64, 32? Needs to be adjusted for the cluster
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


## CLASSIFICATION: ALEXNET

##### Please note that 
##### i) The download may take a while
##### ii) The pretrained models from the pytorch models have very specific requirements regarding input, see https://pytorch.org/docs/stable/torchvision/models.html

In [7]:
# torch preparation
import torch
from torchvision import *
import torchvision.models as models
import torch.nn as nn
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
import torch.nn as nn
from utils_train import train, test, fit

alexnet = models.alexnet(pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /home/jonas/.torch/models/alexnet-owt-4df8aa71.pth
244418560.0 bytes


#### Just as in the paper [], only retrain the last (fc) layer:

In [10]:
for param in alexnet.parameters():
    param.requires_grad = False # == do not change weights, do not re-train


num_classes = 30 # might change, to 32? see paper for category exclusions??
    
## fixed, pre-trained alexnet. Now, replace the last layer:
alexnet.classifier._modules['6'] = nn.Linear(4096, num_classes)
print(*list(alexnet.children()))  # show the model (optional)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
) AdaptiveAvgPool2d(output_size=(6, 6)) Sequential(
  (0): Dropout(p=0.5)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace)
  (3): Dropout(p=0.5)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace)

In [11]:
# needs to be defined on the cluster training procedure
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(alexnet.parameters(), lr=0.003)


alexnet = alexnet.to(device)
n_epochs = 1
# retrain (only that last replaced layer)

#### heavily recommended NOT to run this in the IPNYB, unless run on GPU in script form
#alexnet_retrain = fit(train_loader, val_loader, model=alexnet, optimizer=optimizer, loss_fn=loss_fn, n_epochs=n_epochs)

Epoch 1/1: train_loss: 4.1197, train_accuracy: 20.4425, val_loss: 4.3839, val_accuracy: 21.4646


## CLASSIFICATION: Densenet (161)

##### Please note that 
##### i) The download may take a while
##### ii) The pretrained models from the pytorch models have very specific requirements regarding input, see https://pytorch.org/docs/stable/torchvision/models.html

#### iii) The "Judging a Book by its Cover" Paper implements a simple "LeNet" version as 2nd classification model (which is an old concept, and obviously performs worse.) We should try another "new" classification model, such as Densenet, which is conceptually different.

In [12]:
densenet = models.densenet161(pretrained=True)

In [13]:
# fix the weights
for param in densenet.parameters():
    param.requires_grad = False

# replace the last output layer
densenet.classifier =  nn.Linear(2208, num_classes)
print(*list(densenet.children()))

Sequential(
  (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu0): ReLU(inplace)
  (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (denseblock1): _DenseBlock(
    (denselayer1): _DenseLayer(
      (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace)
      (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU(inplace)
      (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (denselayer2): _DenseLayer(
      (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace)
      (conv1): Conv2d(144, 192, kernel_size=(1, 1), 

In [15]:
# retrain densenet
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(densenet.parameters(), lr=0.003)
densenet = densenet.to(device)

n_epochs = 1
#### heavily recommended NOT to run this, unless run on GPU in script form

#densenet_retrain = fit(train_loader, val_loader, densenet, optimizer, loss_fn, n_epochs)