# Dependencies

In [None]:
! pip install -q numpy pandas sklearn torch torchvision torch-summary

In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
import torchvision
from torch import nn, cuda
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset, random_split, Subset, ConcatDataset
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Lambda, Compose
from torchsummary import summary
import matplotlib.pyplot as plt
import os
import shutil
import time
from itertools import product
from PIL import Image

# Model

In [None]:
class Simple_CNN(nn.Module):
  def __init__(self):
    super(Simple_CNN, self).__init__()
    
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3,3)).to(device)
    nn.init.xavier_uniform_(self.conv1.weight)
    self.max_pool1 = nn.MaxPool2d(kernel_size=(2,2)).to(device)
    self.dropout1 = nn.Dropout2d(p=0.25).to(device)

    self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(4,4)).to(device)
    nn.init.xavier_uniform_(self.conv2.weight)
    self.max_pool2 = nn.MaxPool2d(kernel_size=(2,2)).to(device)
    self.dropout2 = nn.Dropout2d(p=0.25).to(device)

    self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3)).to(device)
    nn.init.xavier_uniform_(self.conv3.weight)
    self.max_pool3 = nn.MaxPool2d(kernel_size=(2,2)).to(device)
    self.dropout3 = nn.Dropout2d(p=0.25).to(device)

    self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3)).to(device)
    nn.init.xavier_uniform_(self.conv4.weight)
    self.max_pool4 = nn.MaxPool2d(kernel_size=(2,2)).to(device)
    self.dropout4 = nn.Dropout2d(p=0.25).to(device)

    self.fc1 = nn.Linear(128*12*12, 128).to(device)
    nn.init.xavier_uniform_(self.fc1.weight)
    self.dropout5 = nn.Dropout(p=0.5).to(device)

    self.fc2 = torch.nn.Linear(128, 64).to(device)
    nn.init.xavier_uniform_(self.fc2.weight)
    self.dropout6 = nn.Dropout(p=0.5).to(device)

    self.fc3 = torch.nn.Linear(64, 1).to(device)
    nn.init.xavier_uniform_(self.fc3.weight)
  
  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.max_pool1(x)
    x = self.dropout1(x)

    x = F.relu(self.conv2(x))  
    x = self.max_pool2(x)  
    x = self.dropout2(x)             

    x = F.relu(self.conv3(x))
    x = self.max_pool3(x)
    x = self.dropout3(x)

    x = F.relu(self.conv4(x))
    x = self.max_pool4(x)
    x = self.dropout4(x) 

    x = x.view(-1, 128*12*12)
    x = self.fc1(x)
    x = self.dropout5(x) 
    x = self.fc2(x)
    x = self.dropout6(x) 
    x = self.fc3(x)

    return x

In [None]:
class Model:
  def __init__(self, params, train_dataloader, val_dataloader, test_dataloader, 
               model_type='pretrained', model_class='resnet50'):
    # parameters for the model
    self.params = params
    # dataloaders
    self.train_dataloader = train_dataloader
    self.val_dataloader = val_dataloader
    self.test_dataloader = test_dataloader
    # model to use for transfer learning
    self.model_class = model_class
    if model_type == 'pretrained':
      self.model = self.pretrained_model(self.model_class)
    else:
      self.model = Simple_CNN()
    print(self.model)
    # loss function to use
    self.loss_fn = nn.MSELoss()
    # optimizer to use; we use an Adam optimizer here
    self.optimizer = torch.optim.Adam(self.model.parameters(), 
                                      lr=self.params['lr'],
                                      weight_decay=self.params['weight_decay'])
  
  def pretrained_model(self, model_class):
    '''
    Function to obtain the desired model and load weights pretrained on ImageNet
    Returns the model with the pretrained weights frozen and the top layers 
    replaced with new layers for fine-tuning
    '''
    # Resnet50 Model
    if model_class == 'resnet50':
      model = models.resnet50(pretrained=True)

      # freeze all layers
      for param in model.parameters():
        param.requires_grad = False
      
      # replace the only FC layer with a new FC layer for regression
      # creating a new layer automatically sets requires_grad=True
      n_inputs = model.fc.in_features

      # layers = []
      # layers.append(nn.Linear(in_features=n_inputs, out_features=256, 
      #                         bias=True))
      # layers.append(nn.ReLU(inplace=True))
      # layers.append(nn.Dropout(p=0.5, inplace=False))
      # layers.append(nn.Linear(in_features=256, 
      #                         out_features=1, 
      #                         bias=True))
      
      # unpack the list into a sequence of layers
      #model.fc = nn.Sequential(*layers)
      
      model.fc = nn.Linear(in_features=n_inputs, out_features=1, bias=True)
    
    # move model to GPU if present
    model = model.to(self.params['device'])

    # if multiple GPUs are being used, enable parallel processing
    if self.params['device'] == 'cuda':
      if self.params['multi_gpu']:
        model = nn.DataParallel(model)
    
    return model
  
  def unfreeze_all_layers(self):
    # unfreeze all layers
    for param in self.model.parameters():
      param.requires_grad = True
  
  def print_model_summary(self):
    # print model summary for verification
    if self.params['device'] == 'cuda':
      if self.params['multi_gpu']:
        summary(
            self.model.module,
            input_size=(3, 224, 224),
            batch_size=self.params['batch_size'],
            device=self.params['device'])
      else:
        summary(
            self.model, input_size=(3, 224, 224), 
            batch_size=self.params['batch_size'], device=self.params['device'])
    else:
        summary(
            self.model, input_size=(3, 224, 224), 
            batch_size=self.params['batch_size'], device=self.params['device'])
  
  def train(self):
    '''
    Function to train the network using the training data for one epoch
    Returns the training loss and accuracy for this epoch
    '''
    # number of training samples
    size = len(self.train_dataloader.dataset)
    # number of batches in set
    num_batches = len(self.train_dataloader)

    # initialize variables
    train_loss = 0.0

    # indicates intention to train; sets behavior for BatchNorm, Dropout, etc.
    self.model.train()

    tracker = 1

    # train model using mini-batches for each iteration
    for batch, (X, y) in enumerate(self.train_dataloader):
      # move data to GPU if available
      if self.params['device'] == 'cuda':
        X, y = X.to(self.params['device']), y.to(self.params['device'])
    
      # clears gradients w.r.t. parameters
      self.optimizer.zero_grad()

      # forward propagation to get outputs from the network for the batch
      outputs = self.model(X)

      #new_shape = (len(y), 1)
      #y = y.view(new_shape)
      # y = y.unsqueeze(-1)
      # calculate loss
      loss = self.loss_fn(outputs.squeeze(), y)

      # add loss to training loss for epoch
      train_loss += loss.item()

      # computes gradients w.r.t. parameters (tensors)
      loss.backward()
      # updates gradients w.r.t. parameters using internally stored grad values
      # only updates layers that are not frozen
      self.optimizer.step()

      # track training progress
      if ((100 * (batch + 1)) / len(self.train_dataloader)) / 20 >= tracker:
        tracker += 1
        print(f'Batch:{batch+1}\tMSE: {loss.item():>7f}')
        if tracker > 5:
          print(f'{(100 * (batch + 1) / len(self.train_dataloader)):.0f}% Complete\t' +
              f'[{size:>4d}/{size:>4d}]')
        else:
          print(f'{(100 * (batch + 1) / len(self.train_dataloader)):.0f}% Complete\t' +
                f'[{(batch+1)*len(X):>4d}/{size:>4d}]')
    
    train_loss /= num_batches

    return train_loss
  
  def validate(self):
    '''
    Function to check network performance using the validation data
    Returns the validation loss and accuracy
    '''
    # number of images in validation set
    size = len(self.val_dataloader.dataset)
    # number of batches in set
    num_batches = len(self.val_dataloader)

    # initialize variables
    val_loss = 0.0

    # indicates intention to test; sets behavior of BatchNorm, Dropout, etc.
    self.model.eval()

    # disable gradient calculation
    with torch.no_grad():
      # iterates through data in the dataloader
      for X, y in self.val_dataloader:
        # moves data to GPU if available
        if self.params['device'] == 'cuda':
          X, y = X.to(self.params['device']), y.to(self.params['device'])
        
        # forward propagation to get outputs from the network for the batch
        outputs = self.model(X)

        # calculate loss
        loss = self.loss_fn(outputs.squeeze(), y)
        
        # add loss to validation loss for epoch
        val_loss += loss.item()

    val_loss /= num_batches

    return val_loss
  
  def test(self):
    '''
    Function to check network performance on the test data
    Returns the test accuracy and predictions
    '''
    # number of images in validation set
    size = len(self.test_dataloader.dataset)
    # number of batches in set
    num_batches = len(self.test_dataloader)

    # initialize variables
    test_loss = 0.0
    predictions = []

    # indicates intention to test; sets behavior of BatchNorm, Dropout, etc.
    self.model.eval()

    # disable gradient calculation
    with torch.no_grad():
      # iterates through data in the dataloader
      for X, y in self.test_dataloader:
        # moves data to GPU if available
        if self.params['device'] == 'cuda':
          X, y = X.to(self.params['device']), y.to(self.params['device'])
        
        # forward propagation to get outputs from the network for the batch
        outputs = self.model(X)

        predictions.append(outputs)

        # calculate loss
        loss = self.loss_fn(outputs.squeeze(), y)

        # add loss to validation loss for epoch
        test_loss += loss.item()

    test_loss /= num_batches

    return test_loss, predictions
  
  def fit(self, path):
    '''
    Function to train the network, implementing early stopping
    path: path for location for best model
    Returns the best model and a list of results for each epoch
    '''
    # variable to hold best epoch number
    best_epoch = 0
    # variable for early stopping
    early_stop_counter = 0
    # variable to track minimum validation loss for saving best model
    min_val_loss = np.Inf
    # list to hold results for each epoch
    results = []

    # number of epochs already trained (if using loaded in model weights)
    try:
      print(f'Model has previously been trained for: {self.model.epochs}' + 
            ' epochs\n')
    except:
      self.model.epochs = 0
      print(f'Starting Training from Scratch\n')
    
    main_start = time.time()

    # train the model for the set number of epochs
    for epoch in range(self.params['num_epochs']):
      # variables to track losses and accuracies for the epoch
      train_loss = 0.0

      val_loss = 0.0

      val_losses = []

      start = time.time()

      print(f"\nEpoch {epoch+1}\n----------------------------------------------")

      # train the network with the training data
      train_loss = self.train()

      # increase early stopping counter
      early_stop_counter += 1
      
      # increase the number of epochs run on the model by 1
      self.model.epochs += 1

      # validate the network
      val_loss = self.validate()

      # add results for this epoch to results list
      results.append([train_loss, val_loss])
      
      print(f'Training: \n MSE: {train_loss:.4f}\t')
      print(f'Validation: \n MSE: {val_loss:.4f}\t')
      
      # save best model if validation loss improves
      if val_loss < min_val_loss:
        print(f'\nValidation loss decreased ({min_val_loss:.4f} -> ' + 
              f'{val_loss:.4f})\nSaving model...\n')
        torch.save(self.model.state_dict(), path)
        # reset counter for early stopping
        early_stop_counter = 0
        # save values for the best model
        min_val_loss = val_loss
        best_epoch = epoch+1

      #if early_stop_counter >= self.params['early_stop']:
        # if the threshold for early stopping is met (no improvement in
        # val loss for specified continuous num of epochs), trigger early stopping
      #  print(f'\nEarly Stopping! Total epochs: {epoch+1}\n')
      #  break
      
      val_losses.append(val_loss)

      print(f'\n{(time.time() - start)/60:.2f} minutes elapsed in epoch\n')
    
    # load the best state dict
    self.model.load_state_dict(torch.load(path))
    # attach the optimizer
    self.model.optimizer = self.optimizer
    # record overall time and print out stats
    total_time = time.time() - main_start
    print(f'Best Epoch: {best_epoch} \n MSE: {min_val_loss:.4f}')
    print(f'\nTotal time: {total_time/60:.2f} minutes elapsed; ' + 
          f'{(total_time / (epoch+1))/60:.2f} minutes per epoch')

    # format results
    results = pd.DataFrame(results, columns=['train_loss', 'val_loss'])
    
    # return results
    return min_val_loss

  def test_performance(self, path):
    '''
    Function to test the performance of the network
    path: path for location for best model
    Returns the predictions for the test set
    '''
    self.load_best_model(path)
    print('Test Set Performance\n\n')
    self.test_loss, self.predictions = self.test()
    print(f'Test MSE: {self.test_loss:.2f}')
    # predictions is populated in batches, so we are concatenating
    # the results
    last_batch_preds = self.predictions[-1]
    self.predictions = self.predictions[:-1]
    self.predictions = torch.stack(self.predictions)
    # flatten stacked results into a 1D array
    self.predictions = self.predictions.reshape(1, -1).squeeze().tolist()
    last_batch_preds = last_batch_preds.reshape(1, -1).squeeze().tolist()
    self.predictions.extend(last_batch_preds)
    self.predictions = torch.Tensor(self.predictions)
    self.predictions.to(self.params['device'])

    return self.predictions
  
  def save_best_model(self, path):
    '''
    Function to save a PyTorch model checkpoint
    path: location to save model
    '''

    # details
    checkpoint = {
        'epochs': self.model.epochs
    }

    # extract the final classifier and the state dictionary
    if self.model_class == 'resnet50':
      if self.params['multi_gpu']:
        checkpoint['fc'] = self.model.module.fc
        checkpoint['state_dict'] = self.model.module.state_dict()
      else:
        checkpoint['fc'] = self.model.fc
        checkpoint['state_dict'] = self.model.state_dict()
    elif self.model_class == 'densenet121':
      if self.params['multi_gpu']:
        checkpoint['classifier'] = self.model.module.classifier
        checkpoint['state_dict'] = self.model.module.state_dict()
      else:
        checkpoint['classifier'] = self.model.classifier
        checkpoint['state_dict'] = self.model.state_dict()
      

    # add the optimizer
    checkpoint['optimizer'] = self.optimizer
    checkpoint['optimizer_state_dict'] = self.optimizer.state_dict()

    # save the data to the path
    torch.save(checkpoint, path)
  
  def load_best_model(self, path):
    '''
    Function to load a PyTorch model checkpoint
    path: saved model path
    '''

    # load in checkpoint
    checkpoint = torch.load(path)

    if self.model_class == 'resnet50':
      self.model = models.resnet50(pretrained=True)
      # set parameters as not trainable
      for param in self.model.parameters():
          param.requires_grad = False
      self.model.fc = checkpoint['fc']
      # load in the state dict
      self.model.load_state_dict(checkpoint['state_dict'])
    elif self.model_class == 'densenet121':
      self.model = models.densenet121(pretrained=True)
      # set parameters as not trainable
      for param in self.model.parameters():
        param.requires_grad = False
      self.model.classifier = checkpoint['classifier']
      # load in the state dict
      self.model.load_state_dict(checkpoint['state_dict'])

    # move model to GPU if available
    if self.params['device'] == 'cuda':
      self.model = self.model.to(self.params['device'])
      if self.params['multi_gpu']:
        self.model = nn.DataParallel(self.model)

    # model basics
    self.model.epochs = checkpoint['epochs']

    # optimizer
    self.optimizer = checkpoint['optimizer']
    self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Device

I am training the model on Google Colab using a GPU. If you have multiple GPUs in parallel, the multi_gpu settings come into play. The code is setup so as to work with most device configurations.

In [None]:
# which device the model will use for training
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Training on {}".format(device))

random_seed = 1

# configurations for GPUs
if device == 'cuda':
  torch.cuda.manual_seed_all(random_seed)
  num_gpus = cuda.device_count()
  print(f'{num_gpus} GPUs detected')
  if num_gpus > 1:
    multi_gpu = True
  else:
    multi_gpu = False
else:
  multi_gpu = False

Training on cuda
1 GPUs detected


# Data

In [None]:
!mkdir data

## Unzip Images

In [None]:
! unzip /content/data/posters.zip -d data

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data/posters/train/118698.jpg  
  inflating: data/__MACOSX/posters/train/._118698.jpg  
  inflating: data/posters/train/102818.jpg  
  inflating: data/__MACOSX/posters/train/._102818.jpg  
  inflating: data/posters/train/93185.jpg  
  inflating: data/__MACOSX/posters/train/._93185.jpg  
  inflating: data/posters/train/758781.jpg  
  inflating: data/__MACOSX/posters/train/._758781.jpg  
  inflating: data/posters/train/105811.jpg  
  inflating: data/__MACOSX/posters/train/._105811.jpg  
  inflating: data/posters/train/499603.jpg  
  inflating: data/__MACOSX/posters/train/._499603.jpg  
  inflating: data/posters/train/299458.jpg  
  inflating: data/__MACOSX/posters/train/._299458.jpg  
  inflating: data/posters/train/91786.jpg  
  inflating: data/__MACOSX/posters/train/._91786.jpg  
  inflating: data/posters/train/397313.jpg  
  inflating: data/__MACOSX/posters/train/._397313.jpg  
  inflating: data/posters/trai

## Create DataFrames

In [None]:
train_posters_df = pd.read_csv('/content/data/train.csv')
train_posters_df = train_posters_df.astype({'imdb_id': int, 'release_date': str})
train_files_list = train_posters_df['imdb_id'].tolist()
for f in train_files_list:
  try:
    source = str(f) + '.jpg'
    destination = './train/' + str(f) + '.jpg'
    shutil.move(source, destination)
  except FileNotFoundError as fnfe:
    continue

In [None]:
test_posters_df = pd.read_csv('/content/data/test.csv')
test_posters_df = test_posters_df.astype({'imdb_id': int, 'release_date': str})
test_files_list = test_posters_df['imdb_id'].tolist()
for f in test_files_list:
  try:
    source = str(f) + '.jpg'
    destination = './test/' + str(f) + '.jpg'
    shutil.move(source, destination)
  except FileNotFoundError as fnfe:
    continue

In [None]:
val_posters_df = pd.read_csv('/content/data/val.csv')
val_posters_df = val_posters_df.astype({'imdb_id': int, 'release_date': str})
val_files_list = val_posters_df['imdb_id'].tolist()
for f in val_files_list:
  try:
    source = str(f) + '.jpg'
    destination = './val/' + str(f) + '.jpg'
    shutil.move(source, destination)
  except FileNotFoundError as fnfe:
    continue

In [None]:
movie_data_df = pd.read_csv('/content/data/df.csv')
ratings_df = movie_data_df[['imdb_id', 'averageRating']].copy()
ratings_df = ratings_df.astype({'imdb_id': int, 'averageRating': float})
ratings_df

Unnamed: 0,imdb_id,averageRating
0,35423,6.4
1,79285,5.1
2,79302,5.1
3,80339,7.7
4,80360,6.9
...,...,...
11689,6921996,6.2
11690,6932874,5.9
11691,6977442,7.4
11692,6980792,6.4


In [None]:
ratings_dict = {}

temp_dict = ratings_df.set_index('imdb_id').T.to_dict('list')

for key in temp_dict:
  v = temp_dict[key]
  ratings_dict[key] = v[0]
  
print(ratings_dict)

{35423: 6.4, 79285: 5.1, 79302: 5.1, 80339: 7.7, 80360: 6.9, 80365: 6.3, 80377: 6.1, 80388: 7.3, 80402: 4.8, 80406: 7.4, 80436: 5.7, 80437: 7.1, 80442: 6.2, 80453: 5.8, 80455: 7.9, 80461: 7.2, 80469: 6.9, 80472: 6.1, 80474: 7.1, 80487: 7.2, 80500: 6.4, 80520: 6.0, 80549: 7.5, 80550: 5.2, 80556: 6.6, 80569: 6.5, 80634: 7.0, 80641: 6.3, 80661: 7.1, 80678: 8.2, 80684: 8.7, 80731: 4.9, 80739: 4.2, 80745: 6.5, 80749: 6.8, 80754: 5.6, 80756: 6.1, 80761: 6.4, 80762: 7.6, 80790: 6.9, 80798: 7.1, 80808: 4.3, 80850: 5.0, 80854: 5.7, 80855: 6.7, 80863: 5.9, 80868: 6.3, 80881: 6.2, 80888: 6.3, 80895: 6.0, 80907: 6.2, 80913: 6.9, 80917: 5.4, 80934: 5.3, 80948: 5.8, 80975: 5.5, 81031: 5.7, 81032: 5.3, 81060: 6.5, 81063: 6.3, 81071: 6.9, 81080: 5.1, 81150: 6.8, 81159: 6.3, 81163: 6.2, 81184: 6.0, 81207: 7.1, 81235: 6.6, 81249: 5.1, 81259: 5.0, 81268: 5.2, 81269: 5.4, 81280: 6.0, 81283: 7.8, 81334: 4.1, 81353: 5.3, 81366: 4.5, 81375: 6.2, 81383: 5.3, 81398: 8.2, 81414: 7.2, 81433: 5.2, 81439: 5.6, 814

# Parameters

This sets the parameters that will be used to build and train the model. They may be changed as desired.

In [None]:
params = {
    # location of training images
    'train_dir': '/content/data/posters/train',
    # location of validation images
    'val_dir': '/content/data/posters/val',
    # location of test images
    'test_dir': '/content/data/posters/test',
    
    # pseudo-random splits of datasets
    'random_seed': 1,
    # learning_rate
    'lr': 1e-3,
    # number of times the network will see the entire training set
    'num_epochs': 10,
    # number of epochs with no improvement to trigger early stopping
    'early_stop': 3,
    # number of training points in each iteration
    'batch_size': 32,
    # weight decay value
    'weight_decay': 0.1,

    # which device the model will use for training
    'device': device,
    'multi_gpu': multi_gpu
}

# DictWriter

In [None]:
import csv
class DictWriter:
    
    def __init__(self,file_path,field_names):
        self.field_names = field_names
        self.file_path = file_path
        self.create_file() #create file if it doesnt exist
        
    def create_file(self):
        if not os.path.exists(self.file_path):
            print('Creating file')
            f = open(self.file_path, 'w')
            w = csv.DictWriter(f, field_names)
            w.writeheader()
            f.close()
        else:
            print('File already exists; appending rows to it')
            
    def add_rows(self,rows):  
        with open(self.file_path, 'a') as f:
            w = csv.DictWriter(f,self.field_names)
            for r in rows:    
                w.writerow(r) 

# Create Datasets

These cells will load the data from the image files and create the datasets that are used in PyTorch for model training and evaluation.

In [None]:
# list of training image files
train_files = os.listdir(params['train_dir'])
# list of validation image files
val_files = os.listdir(params['val_dir'])
# list of test image files
test_files = os.listdir(params['test_dir'])

In [None]:
train_files_ratings = []
val_files_ratings = []
test_files_ratings = []

for f in train_files:
  imdb_id = ''
  for c in f:
    if c.isdigit():
      imdb_id += c
  imdb_id = int(imdb_id)
  train_files_ratings.append((f, ratings_dict[imdb_id]))

for f in val_files:
  imdb_id = ''
  for c in f:
    if c.isdigit():
      imdb_id += c
  imdb_id = int(imdb_id)
  val_files_ratings.append((f, ratings_dict[imdb_id]))

for f in test_files:
  imdb_id = ''
  for c in f:
    if c.isdigit():
      imdb_id += c
  imdb_id = int(imdb_id)
  test_files_ratings.append((f, ratings_dict[imdb_id]))

In [None]:
class CreateDatasets(Dataset):
  '''
  Class to create datasets from given image files and their mode, including
  applying any desired transformations
  '''
  def __init__(self, file_list, dir, mode='train', transform=None):
    self.file_list = file_list # list of image files
    self.dir = dir # location of image files
    self.mode = mode # whether the images belong to the training or test set
    self.transform = transform # transformations to apply to the images
  
  def __len__(self):
    return len(self.file_list) # number of images passed in
  
  def __getitem__(self, idx):
    # get image from file
    img = Image.open(os.path.join(self.dir, self.file_list[idx][0])).convert('RGB')
    # apply the specified transformations
    if self.transform:
      img = self.transform(img)
    # add image labels if the images are in the training set; dog - 1, cat - 0
    if self.mode == 'train':
      self.label = self.file_list[idx][1]
      img = img.numpy()
      return img.astype('float32'), np.float32(self.label)
    else:
      img = img.numpy()
      return img.astype('float32'), self.file_list[idx][0]

In [None]:
# image transformations for the different datasets
image_transforms = {
    # training set uses data augmentation
    'train':
    transforms.Compose([
        #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        #transforms.ColorJitter(),
        #transforms.RandomHorizontalFlip(),
        transforms.Resize(size=(224,224)),
        #transforms.CenterCrop(size=224), # image net standard
        transforms.ToTensor(),
    ]),
    # validation set does not use augmentation
    'val':
    transforms.Compose([
        transforms.Resize(size=(224,224)),
        #transforms.CenterCrop(size=224),
        transforms.ToTensor(),
    ]),
    # test does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(size=(224,224)),
        #transforms.CenterCrop(size=224),
        transforms.ToTensor(),
    ])
}

# create training, validation, and test sets with balanced classes and 
# transformations applied
train_set = CreateDatasets(train_files_ratings, params['train_dir'], 
                           transform=image_transforms['train'])
val_set = CreateDatasets(val_files_ratings, params['val_dir'], 
                         transform=image_transforms['val'])
test_set = CreateDatasets(test_files_ratings, params['test_dir'],
                          transform=image_transforms['test'])

# Baseline (Average Prediction)

In [None]:
train_rating_avg = 0
for movie in train_files_ratings:
  train_rating_avg += movie[1]
train_rating_avg /= len(train_files_ratings)
print("Rating Average in Training Data: " + str(train_rating_avg))

Rating Average in Training Data: 6.178983009111038


In [None]:
true_rating_avg = [train_rating_avg]*len(test_files_ratings)
pred_rating_avg = [movie[1] for movie in test_files_ratings]
rating_avg_mae = mean_absolute_error(true_rating_avg, pred_rating_avg)
rating_avg_mse = mean_squared_error(true_rating_avg, pred_rating_avg)
print("MAE for Average Prediction: " + str(rating_avg_mae))
print("MSE for Average Prediction: " + str(rating_avg_mse))

MAE for Average Prediction: 0.8585805727781036
MSE for Average Prediction: 1.1671864758824264


In [None]:
rating_avg_errors = {'MAE':rating_avg_mae,'MSE':rating_avg_mse,'RMSE':np.sqrt(rating_avg_mse)}
pd.DataFrame([rating_avg_errors]).to_csv('/content/data/ratings_with_posters_baseline.csv', index=False)

# Train Model

## Hyperparameters

In [None]:
hyperparameters = {'batch_size' : [8,16,32],
                   'lr' : [1e-5,1e-4,1e-3],
                   'weight_decay' : [0.1,0.01]}

combinations = list(product(*[range(len(i)) for i in list(hyperparameters.values())]))
scores = np.zeros([len(i) for i in list(hyperparameters.values())])
best_setting = None

field_names = list(hyperparameters.keys()) + ['score']
dw = DictWriter('/content/data/ratings_with_posters_hyperparameters_training_results.csv',field_names)

Creating file


## Configs

In [None]:
current_trials_df = pd.read_csv('/content/data/ratings_with_posters_hyperparameters_training_results.csv')
if 'score' in current_trials_df:
  completed_trials = current_trials_df.drop('score',axis=1).to_dict(orient='records')
  best_score = min(float('inf'), current_trials_df['score'].min())
else:
  completed_trials = []
  best_score = float('inf')

print(f'Current best val score = {best_score}')

Current best val score = inf


## Train

In [None]:
for idx,comb_indexes in enumerate(combinations):
  comb_values = {name:val[idx] for name,val,idx in zip(hyperparameters.keys(),hyperparameters.values(),comb_indexes)}
  
  if comb_values not in completed_trials:
    print('Training with the following hyperparameters:')
    print(comb_values)

    params['lr'] = comb_values['lr']
    params['batch_size'] = comb_values['batch_size']
    params['weight_decay'] = comb_values['weight_decay']

    # create dataloaders for the three sets, which wrap iterables over the sets and 
    # support automatic batching, sampling, shuffling and multiprocess data loading
    train_dataloader = DataLoader(train_set, batch_size=params['batch_size'], 
                                  shuffle=True)
    val_dataloader = DataLoader(val_set, batch_size=params['batch_size'], 
                                shuffle=True)
    test_dataloader = DataLoader(test_set, batch_size=params['batch_size'], 
                                shuffle=False)
    
    # create instance of Model class to fine tune ResNet50 on posters dataset
    resnet_model = Model(params, train_dataloader, val_dataloader, 
                     test_dataloader, model_class='resnet50')

    # unfreeze all layers to allow for training
    resnet_model.unfreeze_all_layers()
    
    # train model
    score = resnet_model.fit(path='/content/data/resnet_unfrozen_posters_ratings.pt')

    scores[tuple(comb_indexes)] = score

    comb_values['score'] = score
    
    dw.add_rows([comb_values])
    
    if score < best_score:
      print(f'Better model found with a score of {np.round(score,4)}. Saving...')
      best_score = score
      best_setting = comb_values
      # save best model
      resnet_model.save_best_model(path='/content/data/resnet_unfrozen_posters_ratings.pth')
      
      print('Saved')
  else:
    print('Trial already exists; skipping...')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps

# Test Model

In [None]:
    params['lr'] = best_setting['lr']
    params['batch_size'] = best_setting['batch_size']
    params['weight_decay'] = best_setting['weight_decay']

In [None]:
# create dataloaders for the three sets, which wrap iterables over the sets and 
# support automatic batching, sampling, shuffling and multiprocess data loading
train_dataloader = DataLoader(train_set, batch_size=params['batch_size'], 
                              shuffle=True)
val_dataloader = DataLoader(val_set, batch_size=params['batch_size'], 
                            shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=params['batch_size'], 
                             shuffle=False)

In [None]:
# create instance of Model class to fine tune ResNet50 on posters dataset
resnet_model = Model(params, train_dataloader, val_dataloader, 
                     test_dataloader, model_class='resnet50')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# calculate test accuracy
preds = resnet_model.test_performance(path='/content/data/resnet_unfrozen_posters_ratings.pth')
preds = preds.to(device)

Test Set Performance


Test MSE: 1.10


In [None]:
true_ratings = [movie[1] for movie in test_files_ratings]
pred_ratings = preds.tolist()
ratings_mae = mean_absolute_error(true_ratings, pred_ratings)
ratings_mse = mean_squared_error(true_ratings, pred_ratings)
print('Test MAE: ' + str(ratings_mae))
print('Test MSE: ' + str(ratings_mse))

Test MAE: 0.8346236874253331
Test MSE: 1.1057657113003005


In [None]:
ratings_test_errors = {'MAE':ratings_mae,'MSE':ratings_mse,'RMSE':np.sqrt(ratings_mse)}
pd.DataFrame([ratings_test_errors]).to_csv('/content/data/ratings_with_posters_test_results.csv', index=False)

# Simple CNN

## Parameters

In [54]:
params = {
    # location of training images
    'train_dir': '/content/data/posters/train',
    # location of validation images
    'val_dir': '/content/data/posters/val',
    # location of test images
    'test_dir': '/content/data/posters/test',
    
    # learning_rate
    'lr': 1e-3,
    # number of times the network will see the entire training set
    'num_epochs': 10,
    # number of epochs with no improvement to trigger early stopping
    'early_stop': 3,
    # number of training points in each iteration
    'batch_size': 64,
    # pseudo-random splits of datasets
    'random_seed': 1,
    'weight_decay': 0.1,
    
    # which device the model will use for training
    'device': device,
    'multi_gpu': multi_gpu
}

## Train CNN

In [55]:
# create instance of Model class to fine tune ResNet50 on posters dataset
simple_cnn = Model(params, train_dataloader, val_dataloader, 
                   test_dataloader, model_type='simple_cnn', model_class='simple_cnn')

Simple_CNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (max_pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (conv2): Conv2d(16, 32, kernel_size=(4, 4), stride=(1, 1))
  (max_pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dropout2): Dropout2d(p=0.25, inplace=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (max_pool3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dropout3): Dropout2d(p=0.25, inplace=False)
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (max_pool4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dropout4): Dropout2d(p=0.25, inplace=False)
  (fc1): Linear(in_features=18432, out_features=128, bias=True)
  (dropout5): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=64

In [56]:
# train model
results = simple_cnn.fit(path='/content/data/simple_cnn_posters_ratings.pt')

Starting Training from Scratch


Epoch 1
----------------------------------------------
Batch:102	MSE: 3.724036
20% Complete	[1632/8122]
Batch:204	MSE: 3.331830
40% Complete	[3264/8122]
Batch:305	MSE: 3.504815
60% Complete	[4880/8122]
Batch:407	MSE: 2.172356
80% Complete	[6512/8122]
Batch:508	MSE: 2.289930
100% Complete	[8122/8122]
Training: 
 MSE: 3.2851	
Validation: 
 MSE: 1.2867	

Validation loss decreased (inf -> 1.2867)
Saving model...


0.43 minutes elapsed in epoch


Epoch 2
----------------------------------------------
Batch:102	MSE: 4.601550
20% Complete	[1632/8122]
Batch:204	MSE: 1.321391
40% Complete	[3264/8122]
Batch:305	MSE: 3.045782
60% Complete	[4880/8122]
Batch:407	MSE: 3.255392
80% Complete	[6512/8122]
Batch:508	MSE: 0.884776
100% Complete	[8122/8122]
Training: 
 MSE: 2.1988	
Validation: 
 MSE: 1.5594	

0.38 minutes elapsed in epoch


Epoch 3
----------------------------------------------
Batch:102	MSE: 1.558598
20% Complete	[1632/8122]
Batch:204	MSE: 1.476300
40% Com

In [57]:
# save best model
simple_cnn.save_best_model(path='/content/data/simple_cnn_posters_ratings.pth')

In [58]:
# calculate test MSE
preds = simple_cnn.test_performance(path='/content/data/simple_cnn_posters_ratings.pth')

Test Set Performance


Test MSE: 1.16


In [61]:
true_ratings = [movie[1] for movie in test_files_ratings]
pred_ratings = preds.tolist()
ratings_mae = mean_absolute_error(true_ratings, pred_ratings)
ratings_mse = mean_squared_error(true_ratings, pred_ratings)
print('Test MAE: ' + str(ratings_mae))
print('Test MSE: ' + str(ratings_mse))

Test MAE: 0.8642677182354227
Test MSE: 1.1680686455450362


In [62]:
ratings_test_errors = {'MAE': ratings_mae, 'MSE':ratings_mse,'RMSE':np.sqrt(ratings_mse)}
pd.DataFrame([ratings_test_errors]).to_csv('/content/data/ratings_with_posters_simple_cnn_test_results.csv', index=False)