### Setup

This involves the installation of required libraries and importing them. The colab version also includes mounting the drive and passing the paths to where the files have been stored.

In [1]:
!pip install pytorch-pretrained-bert



In [3]:
### Google-Colab Version ###

# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
# Import Libraries
import os
import getpass
import pandas as pd
import re
from io import StringIO
from sklearn.model_selection import train_test_split

import time
import sklearn
import copy
import random
from datetime import datetime

import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from pytorch_pretrained_bert import BertTokenizer, BertModel
import torch.nn.functional as F
import math

In [5]:
## Code snippet to check if any unused variable still occupies GPU Memory 

### RUN JUST FOR THE COLAB VERSION ###
def pretty_size(size):
	"""Pretty prints a torch.Size object"""
	assert(isinstance(size, torch.Size))
	return " × ".join(map(str, size)) 

def dump_tensors(gpu_only=True):
	"""Prints a list of the Tensors being tracked by the garbage collector."""
	import gc
	total_size = 0
	for obj in gc.get_objects():
		try:
			if torch.is_tensor(obj):
				if not gpu_only or obj.is_cuda:
					print("%s:%s%s %s" % (type(obj).__name__, 
										  " GPU" if obj.is_cuda else "",
										  " pinned" if obj.is_pinned else "",
										  pretty_size(obj.size())))
					total_size += obj.numel()
			elif hasattr(obj, "data") and torch.is_tensor(obj.data):
				if not gpu_only or obj.is_cuda:
					print("%s → %s:%s%s%s%s %s" % (type(obj).__name__, 
												   type(obj.data).__name__, 
												   " GPU" if obj.is_cuda else "",
												   " pinned" if obj.data.is_pinned else "",
												   " grad" if obj.requires_grad else "", 
												   " volatile" if obj.volatile else "",
												   pretty_size(obj.data.size())))
					total_size += obj.data.numel()
		except Exception as e:
			pass        
	print("Total size:", total_size)

dump_tensors()
torch.cuda.empty_cache()
import gc 
model = None
learn = None
gc.collect()


Total size: 0


37

In [6]:
### RUN FOR THE JUPYTER VERSION ###

## Directories
user = getpass.getuser()
if user == 'scgst':
    dir_home = "C:\\Users\\scgst\\Documents\\Git\\COMP5329\\Assignment_2\\Code\\"
elif user == 'mgup6878':
    dir_home = "C:\\Users\\mgup6878\\Desktop\\Deep Learning\\COMP5329 Assignment 2-20200513T155933Z-001\\COMP5329 Assignment 2\\Code\\"
elif user == 'root':
    dir_home = '/content/drive/My Drive/COMP5329 Assignment 2-20200513T155933Z-001.zip (Unzipped Files)/COMP5329 Assignment 2/Code/'

dir_input = os.path.join(dir_home, 'Input')
dir_output = os.path.join(dir_home, 'Output')

dir_data = os.path.join(dir_input, 'data')
if user == 'root':
    dir_data = os.path.join(dir_input, 'Data2')

train_csv = os.path.join(dir_input,'train.csv')
test_csv = os.path.join(dir_input,'test.csv')

In [7]:
def seed_all(seed = 27):
    
    """https://pytorch.org/docs/stable/notes/randomness.html"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_all(28)

In [8]:
torch.cuda.empty_cache()

In [9]:
# Parameters
BATCH_SIZE = 50 # 200 if google colab #30
NUM_EPOCHS = 10
LEARNING_RATE = 0.05

USE_BERT = False
USE_OVER_SAMPLING = False

TRAIN_TEXT = False

# GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Loading Data csv

In [10]:
## Read in train and test tables
with open(train_csv) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    train_df_full = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
    
print(train_df_full.head())
print(train_df_full.shape)
print("")

with open(test_csv) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    test_df = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
    
print(test_df.head())
print(test_df.shape)

  ImageID  Labels                                            Caption
0   0.jpg       1   Woman in swim suit holding parasol on sunny day.
1   1.jpg    1 19  A couple of men riding horses on top of a gree...
2   2.jpg       1  They are brave for riding in the jungle on tho...
3   3.jpg  8 3 13  a black and silver clock tower at an intersect...
4   4.jpg   8 3 7   A train coming to a stop on the tracks out side.
(29996, 3)

     ImageID                                            Caption
0  30000.jpg  A little girl waring a krispy kreme hat holdin...
1  30001.jpg  A beautiful young woman holding an orange fris...
2  30002.jpg  A group of people sitting on couch next to a c...
3  30003.jpg         A person on a snowboard rides on the hill.
4  30004.jpg  A man riding a skateboard with a helmet on in ...
(10000, 2)


### Encoding
The no. of labels are 18  - [1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19]
The labels present in given train_data is in the form of space separated strings. These are split into lists and then later one-hot encoded to convert them to format acceptable by the model. The functions below generate *get_encoding(), encode_target(), revert_encoding()* . Their functions are explained in the code below. 

In [11]:
def get_encoding(labels):
    labels = [[int(n) for n in el ]for el in [w.split(' ') for w in labels.tolist()]]
    '''
    This function aims to get 2 dictionaries to encode and decode the labels. 
    Input: Labels column of data
    Returns the no. of classes, the encoding dictionary 'label_dict' and decoding dictionary 'label_dict_revert'
    '''
    # Get 
    flat_list = []
    for sublist in labels:
        for item in sublist:
            flat_list.append(item)
            
    unique_labels = sorted(list(set(flat_list)))
    n_classes = len(unique_labels)
    
    label_dict = {l:i for i,l in enumerate(unique_labels)}
    label_dict_revert = {i:l for i,l in enumerate(unique_labels)}
    
    return(n_classes, label_dict, label_dict_revert)

def encode_target(labels, label_dict, n_classes):
    '''
    This function aims to encode the labels column before training the models
    Input: Labels column of data, encoding dictionary, no. of classes
    Returns one-hot encoded labels column 'labels_expanded'
    '''

    labels = [[int(n) for n in el ]for el in [w.split(' ') for w in labels.tolist()]]
    
    labels_expanded = []
    for el in labels:
        label_arr = [0] * n_classes
        for l in el:
            d = label_dict[l]
            label_arr[d] = 1
        labels_expanded.append(label_arr)
        
    return labels_expanded
# labels_expanded = encode_target(labels, label_dict, n_classes)

def revert_encoding(labels_expanded, label_dict_revert):
    '''
    This function aims to decode the labels column before the evaluation of the model
    Input: Encoded labels column of data, decoding dictionary
    Returns labels in the original format
    '''
    full_map = []
    for el in labels_expanded:
        c = 0
        label_revert = []
        for l in el:
            if (l == 1):
                d = label_dict_revert[c]
                d = str(d)
                label_revert.append(d)
            c += 1
        s = " ".join(label_revert)
        full_map.append(s)
    
    return full_map

# encode_reverted = revert_encoding(labels_expanded, label_dict_revert)

In [12]:
labels = train_df_full['Labels']
n_classes, label_dict, label_dict_revert = get_encoding(labels)
print(n_classes)
print(label_dict)
print(label_dict_revert)

18
{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17}
{0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 13, 12: 14, 13: 15, 14: 16, 15: 17, 16: 18, 17: 19}


In [13]:
# Encode labels
labels_expanded = encode_target(labels, label_dict, n_classes)

# Add encoded labels to train table
train_df_full['Expanded_Labels'] = labels_expanded
train_df_full.head()

Unnamed: 0,ImageID,Labels,Caption,Expanded_Labels
0,0.jpg,1,Woman in swim suit holding parasol on sunny day.,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1.jpg,1 19,A couple of men riding horses on top of a gree...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,2.jpg,1,They are brave for riding in the jungle on tho...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,3.jpg,8 3 13,a black and silver clock tower at an intersect...,"[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ..."
4,4.jpg,8 3 7,A train coming to a stop on the tracks out side.,"[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ..."


### BERT Embeddings
The following code aims to get the sentence embeddings for each image caption for both train and test datasets. For this we simply use the pre-trained BERT model 'bert-base-uncased'. The model is then ran in the *eval()* mode to get the embeddings. The *get_bert_embeddings()* contains the implementation to preprocess the data, get the word embeddings for each caption from the second to the last hidden layers and averaging them to get one sentence embedding. As the dataset is quite large, to avoid memory issues, we are splitting the dataset to get embeddings batchwise, then combining them to make a single tensor. 

#### Getting the BERT embeddings from pre-trained model

In [14]:
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load pre-trained model (weights)
text_model = BertModel.from_pretrained('bert-base-uncased')


In [15]:
def get_bert_embeddings(X_captions, MAX_LEN, tokenizer, model):
    tokenized_list = []
    ids_list = []
    seg_id_list = []
    for sent in X_captions:

        tokenize = tokenizer.tokenize('[CLS] ' + sent + ' [SEP]')
        
        if len(tokenize) > MAX_LEN : tokenize  = tokenize[:MAX_LEN]
            
        ids = tokenizer.convert_tokens_to_ids(tokenize)
            
        ids = torch.tensor(ids + [0] * (MAX_LEN - len(ids)))

        segments_ids = torch.tensor([1]* MAX_LEN)
        
        seg_id_list.append(segments_ids)
        tokenized_list.append(tokenize)
        ids_list.append(ids)

    
    tokens_tensor = torch.stack(ids_list)
    segments_tensors = torch.stack(seg_id_list)
    model.to(device)
    
    # Put the model in "evaluation" mode, meaning feed-forward operation.
    model.eval()
    # Predict hidden states features for each layer
    with torch.no_grad():
        encoded_layers, _ = model(tokens_tensor.to(device), segments_tensors.to(device))
    token_embeddings = torch.stack(encoded_layers, dim=0)
    
    
    token_vecs = encoded_layers[11]
    # Calculate the average of all 59 token vectors.
    sentence_embeddings = torch.mean(token_vecs, dim=1)
    
    return sentence_embeddings

In [16]:
## Code to concat list of tensors to one tensor
#### Code cell 3
def all_dataset_embeddings(X_captions, BATCH_SIZE, MAX_LEN, tokenizer, model,mode = 'training'):
    n_batches = math.ceil(len(X_captions)/BATCH_SIZE)
    #sent_emb_list = [get_bert_embeddings(X_captions[i * BATCH_SIZE:(i+1)*BATCH_SIZE], MAX_LEN, tokenizer,model) for i in range(0, n_batches)]
    sent_emb_list = []
    for i in range(0, n_batches):
        sent_emb = get_bert_embeddings(X_captions[i * BATCH_SIZE:(i+1)*BATCH_SIZE], MAX_LEN, tokenizer,model)     

        if i % 10 == 0:
            print("Text Batch Process for {} set: {}/{} | Time: {}".format(
                mode,
                str(i),
                str(n_batches),
                datetime.now()
            ))
        sent_emb_list.append(sent_emb)
    return torch.cat(sent_emb_list, dim = 0 )

#### Load the sentence embeddings
After saving the embeddings once, we no longer would want to run the code again to get same results. To avoid the hassle, we will load the saved embeddings.

In [17]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
TEXT_BATCH_SIZE = 1000
MAX_LEN = 59
# Load pre-trained BERT model
text_model = BertModel.from_pretrained('bert-base-uncased')

if USE_BERT:
    if TRAIN_TEXT:
        train_emb_list= all_dataset_embeddings(
            train_df_full.iloc[:, 2], TEXT_BATCH_SIZE , MAX_LEN, tokenizer, text_model, 'training'
        )
        test_emb_list =  all_dataset_embeddings(
            test_df.iloc[:, 1], TEXT_BATCH_SIZE, MAX_LEN, tokenizer, text_model, 'testing'
        )
        
        ## Saving the embeddings for later use
        torch.save(train_emb_list, os.path.join(dir_output, 'train_emb.pt'))
        torch.save(test_emb_list, os.path.join(dir_output, 'test_emb.pt'))
        
    else:
        train_emb_list = torch.load(os.path.join(dir_output, 'train_emb.pt'))
        test_emb_list = torch.load(os.path.join(dir_output, 'test_emb.pt'))
       
    TEXT_LENGTH = train_emb_list.shape[1]
    print(train_emb_list.shape, test_emb_list.shape)
    
else:
    TEXT_LENGTH = 0

### Class Exploratory

Exploratory Data Analysis results reveal that the dataset is highly imbalanced due to bias towards label '1'. Based on manual analysis, the dataset seems to be a computer-vision challenge to find identify multiple entities in an image. For example -- humans, cats, trains, bikes etc. Some images have multi-labels which some have just have pure labels. 

In [18]:
# Class distribution before sample
print(np.sum(labels_expanded, axis = 0))

# Classes are unbalanced
# Label one has more chances to be classfield 
# Up sampling cases with data augmentation to potentially resolve the unbalance issue

[22794  1162  4364  1272  1130  1394  1221  2210  1042  1471   604   605
   251  1934  1099  1430  1525  1020]


In [19]:
## Assigning weights to each class such that dominant class is weighed less and less dominant are weighted more
## These weights can be used to manage the data imbalance.
## This technique wasn't found to be much useful for our dataset. Hence Data Augmentation was used for our dataset.
tot_list = list(np.sum(labels_expanded, axis = 0))
max_sample = max(tot_list)
weights_per_label = [max_sample/n for n in tot_list]
print(max_sample)
print(weights_per_label)

22794
[1.0, 19.61617900172117, 5.223189734188818, 17.919811320754718, 20.171681415929203, 16.351506456241033, 18.668304668304668, 10.314027149321268, 21.87523992322457, 15.49558123725357, 37.73841059602649, 37.67603305785124, 90.81274900398407, 11.785935884177869, 20.740673339399454, 15.93986013986014, 14.946885245901639, 22.347058823529412]


### Over-sampling

In [20]:
if USE_OVER_SAMPLING:
    # Index for cases have label one and do not have label one
    NO_LABEL_ONE_INDEX = []
    LABEL_ONE_INDEX = []
    for i in range(len(labels_expanded)):
        el = labels_expanded[i]
        if el[0] == 0:
            NO_LABEL_ONE_INDEX.append(i)
        else:
            LABEL_ONE_INDEX.append(i)

    # Sampling cases not having label one
    TIMES_TO_EXTRACT_UNBALANCE_CALSS = 10
    SAMPLE_SIZE = 5000
    SAMPLE_NON_LABEL_ONE_INDEX = []
    for i in range(TIMES_TO_EXTRACT_UNBALANCE_CALSS):
        SAMPLE_NON_LABEL_ONE_INDEX.append(np.random.choice(NO_LABEL_ONE_INDEX, SAMPLE_SIZE))
    SAMPLE_NON_LABEL_ONE_INDEX = np.concatenate(SAMPLE_NON_LABEL_ONE_INDEX)
    FULL_DATA_INDEX = np.concatenate([LABEL_ONE_INDEX, NO_LABEL_ONE_INDEX, SAMPLE_NON_LABEL_ONE_INDEX])
    # Class distribution before sample
    CLASS_AFTER_SAMPLE = []
    for i in FULL_DATA_INDEX:
        CLASS_AFTER_SAMPLE.append(labels_expanded[i])
    print(np.sum(CLASS_AFTER_SAMPLE, axis = 0))
    train_df_full_new = pd.DataFrame(
        train_df_full, 
        columns = ['ImageID' , 'Labels', 'Caption', 'Expanded_Labels'],
        index = FULL_DATA_INDEX
    ) 
    
    train_df_full = train_df_full_new.reset_index()

### Data Partition

The  data was split into train and validation sets in the ratio 70:30. Hence, the no. of examples in train set were 20,997 and that in validation set 8,999. The trained BERT embeddings were split in the same ratio.

In [21]:
ALL_INDEX = range(0, len(train_df_full))
TRAIN_INDEX, VAL_INDEX = train_test_split(ALL_INDEX, test_size = 0.30)

In [22]:
train_df = pd.DataFrame(
    train_df_full, 
    columns = ['ImageID' , 'Labels', 'Caption', 'Expanded_Labels'],
    index = TRAIN_INDEX
) 
train_df = train_df.reset_index(drop = True)

val_df = pd.DataFrame(
    train_df_full, 
    columns = ['ImageID' , 'Labels', 'Caption', 'Expanded_Labels'],
    index = VAL_INDEX
) 
val_df = val_df.reset_index(drop = True)

In [23]:
if USE_BERT:
    train_emb = train_emb_list[TRAIN_INDEX]
    val_emb = train_emb_list[VAL_INDEX]
    test_emb = test_emb_list

### Data Extraction

The following *ImageData* class aims to build a custom dataset in a format that can be used by the dataloader. It takes the dataframe as an input which can be the train/validation/test sets containing the ImageId column containing image file names and the one-hot-encoded labels(only for the train/validation sets), the path to the folder containing the images, and a parameter 'test' which if true indicates it is a test set.  

In [24]:
# Extract Data
class ImageData(data.Dataset):
    def __init__(self, df, dirpath, transform, test = False):
        self.df = df
        self.test = test
        self.dirpath = dirpath
        self.transform = transform
        
        # image data 
        self.image_arr = np.asarray(str(self.dirpath) + '/' + self.df.iloc[:, 0])          
        
        # labels data
        if not self.test:
             self.label_df = self.df.iloc[:, 3]
        
        # Calculate length of df
        self.data_len = len(self.df.index)

    def __len__(self):
        return self.data_len
    
    def __getitem__(self, idx):
        image_name = self.image_arr[idx]
        img = Image.open(image_name)
        img_tensor = self.transform(img)
        if not self.test:
            image_labels = self.label_df[idx]                
            image_label = torch.tensor(image_labels, dtype= torch.float32)
            return (img_tensor, image_label.squeeze())
        
        return (img_tensor)

In [25]:
# Image transformation 
### The following code mentions the data augmentation transformations applied to images in each of the datasets.
### Each dataset is the loaded onto respective dataloaders to be used while training

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(255),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(p = 0.5),
#         transforms.RandomVerticalFlip(p = 0.5),
#         transforms.RandomRotation(degrees = [-45, 45]),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(255),
        transforms.RandomResizedCrop(224), 
        transforms.RandomHorizontalFlip(p = 0.5),
#         transforms.RandomVerticalFlip(p = 0.5),
#         transforms.RandomRotation(degrees = [-45, 45]),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(255),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
    ])
}

# Loading data
train_dataset = ImageData(train_df, dir_data, data_transforms['train'])
train_loader = data.DataLoader(
    dataset = train_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False
)
features_train, labels_train = next(iter(train_loader))

val_dataset = ImageData(val_df, dir_data, data_transforms['val'])
val_loader = data.DataLoader(
    dataset = val_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False
)
features_val, labels_val = next(iter(val_loader))

train_full_dataset = ImageData(train_df_full, dir_data, data_transforms['train'])
train_full_loader = data.DataLoader(
    dataset = train_full_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False
)
features_train_full, labels_train_full = next(iter(train_full_loader))

test_dataset = ImageData(test_df, dir_data, data_transforms['test'], test = True)
test_loader = data.DataLoader(
    dataset = test_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False
)
features_test = next(iter(test_loader))

In [26]:
print(f"Train Data Length: {len(train_df)}\nMini Batch Size: {BATCH_SIZE}\nBatch Numbers: {len(train_loader)}\nTrain Features: {features_train.shape}\nTrain Labels: {labels_train.shape}")
print()
print(f"Validation Data Length: {len(val_df)}\nMini Batch Size: {BATCH_SIZE}\nBatch Numbers: {len(val_loader)}\nValidation Features: {features_val.shape}\nValidation Labels: {labels_val.shape}")
print()
print(f"Full Train Data Length: {len(train_df_full)}\nMini Batch Size: {BATCH_SIZE}\nBatch Numbers: {len(train_full_loader)}\nFull Train Features: {features_train_full.shape}\nFull Train Labels: {labels_train_full.shape}")
print()
print(f"Test Data Length: {len(test_df)}\nMini Batch Size: {BATCH_SIZE}\nBatch Numbers: {len(test_loader)}\nTest Features: {features_test.shape}")

Train Data Length: 20997
Mini Batch Size: 50
Batch Numbers: 420
Train Features: torch.Size([50, 3, 224, 224])
Train Labels: torch.Size([50, 18])

Validation Data Length: 8999
Mini Batch Size: 50
Batch Numbers: 180
Validation Features: torch.Size([50, 3, 224, 224])
Validation Labels: torch.Size([50, 18])

Full Train Data Length: 29996
Mini Batch Size: 50
Batch Numbers: 600
Full Train Features: torch.Size([50, 3, 224, 224])
Full Train Labels: torch.Size([50, 18])

Test Data Length: 10000
Mini Batch Size: 50
Batch Numbers: 200
Test Features: torch.Size([50, 3, 224, 224])


### Model Development

In [27]:
## Uncomment to empty the GPU cache ; Alternatively, you may restart the session and clear all the outputs
torch.cuda.empty_cache()

In [28]:
# Get pretrained model using torchvision.models as models library
## We use the densenet161 pre-trained model

model = models.densenet161(pretrained = True)
for param in model.parameters():
    param.requires_grad = False

total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')
print()

# Create new classifier for model using torch.nn as nn library
classifier_input = model.classifier.in_features
print('Number of Outputs from densenet161 features: ' + str(classifier_input))
print()

#PUT IN THE NUMBER OF LABELS IN THE DATA ; in our case 18
num_labels = n_classes  

#The input to the classifier model will be the concatenated image tensor and the corresponding BERT embedding. 
classifier = nn.Sequential(
    nn.Linear(classifier_input + TEXT_LENGTH, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Linear(512, 300),
    nn.ReLU(),
    nn.Linear(300, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100, num_labels),
    nn.Sigmoid() # LogSoftmax(dim = 1)
)
# Replace default classifier with new classifier
model.classifier = classifier

total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

# Move model to the device specified above
model.to(device)

# Set the error function using torch.nn as nn library
criterion = nn.BCEWithLogitsLoss()

# Set the optimizer function using torch.optim as optim library
optimizer = optim.Adam(model.classifier.parameters(), lr = LEARNING_RATE)



28,681,000 total parameters.
0 training parameters.

Number of Outputs from densenet161 features: 2208

29,494,834 total parameters.
3,022,834 training parameters.


In [29]:
weights_per_label = torch.FloatTensor(weights_per_label).to(device)

### Training Model

In [None]:
running_train_loss = []
running_val_loss = []
best_loss = np.inf

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    train_loss = 0
    val_loss = 0
    
    # Training the model
    model.train()
    mini_batch_counter = 0
    for inputs, labels in train_loader:
        # Print the progress of our training
        if (mini_batch_counter % 50) == 0:
            print("Epoch: {}/{} | Phase: 'Train' | Batch: {}/{} | Time: {}".format(
              epoch + 1,
              NUM_EPOCHS, 
              mini_batch_counter + 1,
              len(train_loader),
              datetime.now()
            ))
        
        # Text mini batch
        text_train_mini_batch = train_emb[mini_batch_counter * BATCH_SIZE : (mini_batch_counter + 1) * BATCH_SIZE] 
        # Move to device
        inputs, labels = inputs.to(device), labels.to(device)
        # Clear optimizers
        optimizer.zero_grad()
        # Forward pass
        features = model.features(inputs)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        concatenated_embeddings_torch = torch.cat((out.to(device), text_train_mini_batch.to(device)), 1)
        output = model.classifier(concatenated_embeddings_torch)
        # Loss
        loss = criterion(torch.sigmoid(output), labels)
        # Calculate gradients (backpropogation)
        loss.backward()
        # Adjust parameters based on gradients
        optimizer.step()
        # Add the loss to the training set's running loss
        train_loss += loss.item() * inputs.size(0)
        
        mini_batch_counter += 1
    
    # Get the average loss for the entire epoch
    train_loss = train_loss / len(train_loader.dataset)   
    running_train_loss.append(train_loss)
    elapsed_train_time = time.time() - start_time
    
    print('Epoch: {} / {} \tTraining Loss: {:.6f} \tTrain Time: {:.6f}mins'.format(
        epoch + 1, NUM_EPOCHS, train_loss, elapsed_train_time / 60
    ))

    # Evaluating the model
    model.eval()
    mini_batch_counter = 0
    # Tell torch not to calculate gradients
    with torch.no_grad():
        for inputs, labels in val_loader:
            # Print the progress of our training
            if (mini_batch_counter % 50) == 0:
                print("Epoch: {}/{} | Phase: 'Test' | Batch: {}/{} | Time: {}".format(
                  epoch + 1,
                  NUM_EPOCHS, 
                  mini_batch_counter + 1,
                  len(val_loader),
                  datetime.now()
                ))
                
            # Text mini batch
            text_val_mini_batch = val_emb[mini_batch_counter * BATCH_SIZE : (mini_batch_counter + 1) * BATCH_SIZE]   
            # Move to device
            inputs, labels = inputs.to(device), labels.to(device)
            # Forward pass
            features = model.features(inputs)
            out = F.relu(features, inplace = True)
            out = F.adaptive_avg_pool2d(out, (1, 1))
            out = torch.flatten(out, 1)
            concatenated_embeddings_torch = torch.cat((out.to(device), text_val_mini_batch.to(device)), 1)
            output = model.classifier(concatenated_embeddings_torch)
            # Calculate Loss
            valloss = criterion(torch.sigmoid(output), labels)
            # Add loss to the validation set's running loss
            val_loss += valloss.item()*inputs.size(0)

            mini_batch_counter += 1
            
    # Get the average loss for the entire epoch
    valid_loss = val_loss/len(val_loader.dataset)
    running_val_loss.append(valid_loss)
    elapsed_test_time = time.time() - start_time - elapsed_train_time
    
    if valid_loss < best_loss:
        best_loss = valid_loss
        best_epoch = epoch
        best_model_wts = copy.deepcopy(model.state_dict())
    
    # Print out the information
    print('Epoch: {} / {} \tValidation Loss: {:.6f} \tValidation Time: {:.6f}mins'.format(
        epoch + 1, NUM_EPOCHS, valid_loss, elapsed_test_time/60
    ))
    
    # plot the cost
    plt.plot(running_val_loss)
    plt.ylabel('cost')
    plt.xlabel('epochs')
    plt.show()

print('Best Epoch is ' + str(best_epoch))
model.load_state_dict(best_model_wts)

### Loss v.s. Epochs

In [None]:
# plot the cost
plt.plot(running_train_loss)
plt.ylabel('cost')
plt.xlabel('epochs')
plt.show()

In [None]:
# plot the cost
plt.plot(running_val_loss)
plt.ylabel('cost')
plt.xlabel('epochs')
plt.show()

### Prediction and Scoring on Validation Set

In [None]:
model.eval()

# Get output
start_time = time.time()
whole_val_outputs = np.zeros((len(val_dataset), n_classes))
whole_val_labels = np.zeros((len(val_dataset), n_classes))

mini_batch_counter = 0
for val_batch_input, val_batch_labels in val_loader:
    if ((mini_batch_counter) % 50 == 0):
        print(str(mini_batch_counter + 1) + '/' + str(len(val_loader)))

    # Text mini batch
    text_val_mini_batch = val_emb[mini_batch_counter * BATCH_SIZE : (mini_batch_counter + 1) * BATCH_SIZE]
    # Move to device
    val_batch_input = val_batch_input.to(device)
    # Forward pass
    features = model.features(val_batch_input)
    out = F.relu(features, inplace = True)
    out = F.adaptive_avg_pool2d(out, (1, 1))
    out = torch.flatten(out, 1)
    concatenated_embeddings_torch = torch.cat((out.to(device), text_val_mini_batch.to(device)), 1)
    
    val_batch_output = model.classifier(concatenated_embeddings_torch).detach().cpu().numpy()
    val_batch_labels = val_batch_labels.detach().cpu().numpy()
    
    # Since our model outputs a LogSoftmax, find the real 
    # percentages by reversing the log function
    whole_val_outputs[mini_batch_counter * BATCH_SIZE:(mini_batch_counter + 1) * BATCH_SIZE, :] = np.exp(val_batch_output)
    whole_val_labels[mini_batch_counter * BATCH_SIZE:(mini_batch_counter + 1) * BATCH_SIZE, :] = val_batch_labels
    mini_batch_counter += 1
    
elapsed_time = time.time() - start_time
print(elapsed_time)

In [None]:
# Get Prediction on Validation

# # Calculate F1 Score on validation set
# whole_val_predictions = np.round(whole_val_outputs)
# print(sklearn.metrics.f1_score(
#    y_true = whole_val_labels, y_pred = whole_val_predictions, average = 'weighted'
# ))

PERCENTILE = 99.7
whole_val_predictions = np.zeros(whole_val_outputs.shape)
for i in range(len(whole_val_outputs)):
     whole_val_predictions[i, whole_val_outputs[i] > np.percentile(whole_val_outputs[i], PERCENTILE)] = 1
# Calculate F1 Score on validation set
print(sklearn.metrics.f1_score(
    y_true = whole_val_labels, y_pred = whole_val_predictions, average = 'weighted'
))

In [None]:
whole_val_predictions[0]

### Prediction on Test Set

In [None]:
# Final Prediction
# Get output
start_time = time.time()
whole_test_outputs = np.zeros((len(test_dataset), n_classes))
whole_test_outputs = np.zeros((len(test_dataset), n_classes))
mini_batch_counter = 0
for test_batch_input in test_loader:
    if ((mini_batch_counter) % 50 == 0):
        print(str(mini_batch_counter + 1) + '/' + str(len(test_loader)))
    
    # Text mini batch
    text_test_mini_batch = test_emb[mini_batch_counter * BATCH_SIZE : (mini_batch_counter + 1) * BATCH_SIZE]
    # Forward
    features = model.features(test_batch_input.to(device))
    out = F.relu(features, inplace = True)
    out = F.adaptive_avg_pool2d(out, (1, 1))
    out = torch.flatten(out, 1)
    concatenated_embeddings_torch = torch.cat((out.to(device), text_test_mini_batch.to(device)), 1)
    test_batch_output = model.classifier(concatenated_embeddings_torch).detach().cpu().numpy()

    # Since our model outputs a LogSoftmax, find the real 
    # percentages by reversing the log function
    whole_test_outputs[mini_batch_counter * BATCH_SIZE:(mini_batch_counter + 1) * BATCH_SIZE, :] = np.exp(test_batch_output)
    mini_batch_counter += 1
    
elapsed_time = time.time() - start_time
print(elapsed_time)

In [None]:
# Get Prediction on Validation

# Get Prediction on Test
whole_test_predictions = torch.round(whole_test_outputs)

# # Get Prediction on Test
# PERCENTILE = 99.7
# whole_test_predictions = np.zeros(whole_test_outputs.shape)
# for i in range(len(whole_test_predictions)):
#     whole_test_predictions[i, whole_test_outputs[i] > np.percentile(whole_test_outputs[i], PERCENTILE)] = 1

### Submission

In [None]:
# Submission
submission = revert_encoding(whole_test_predictions, label_dict_revert)

In [None]:
np.array(submission).shape

In [None]:
test_df['Labels'] = submission
test_df = test_df.drop(columns = 'Caption')
test_df

In [None]:
test_df.to_csv(os.path.join(dir_output, 'Submission_Model_Final_Pipeline.csv'), index = False)

### Save the Model

In [None]:
# Save the model
PATH = os.path.join(dir_output, 'Model_Final_Pipeline.pth')
torch.save(model.state_dict(), PATH)