# RESNET PRETRAINING ON VISUAL SENTIMENT ANALYSIS
Pretraining of ResNet 50 on the task of Visual Sentiment Analysis on [Disaster Images in Social Media](https://arxiv.org/pdf/2009.03051.pdf).

### PACKAGES INSTALLATION

In [None]:
! pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### DRIVE LINKING

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### IMPORTS

In [None]:
import numpy as np

import pandas as pd
import json

from torch.utils.data import DataLoader

import torch 
from torch import nn
from torch.optim import Adam
import torch.nn as nn

from tqdm.notebook import tqdm

from sklearn.metrics import f1_score

import matplotlib.pyplot as plt

import os
from datetime import datetime

from tabulate import tabulate
import seaborn as sn

from sklearn.model_selection import train_test_split

from transformers import ResNetForImageClassification
import cv2

### DATASET

####  DATASET LOADING

In [None]:
def process_img(img):

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (240, 240), interpolation=cv2.INTER_AREA)
    img = np.array(img, dtype=np.float32) / 255.0
    img = np.reshape(img, (3, 240, 240))

    return img

def load_dataset():
    
    folder = "/content/drive/MyDrive/DeepLearning/Dataset/visual_sentiment/"
    img_folder = folder + "images/"
    f_name = folder + "annotations.csv"

    df = pd.read_csv(f_name, sep=';')
    score_cols = ["A{}.Q1".format(i) for i in range(1, 6)]

    imgs = []
    labels = []
    for i in df.index:
        img_name = df["filename"][i]
        scores = np.array(df[score_cols].iloc[i])

        score = np.mean(scores) / 10
        label = 0 if score < 0.5 else 1

        try:
            img = cv2.imread(img_folder + img_name)
            img = process_img(img)
        except:
            continue

        imgs.append(img)
        labels.append(label)

    X_train, X_test, y_train, y_test = train_test_split(imgs, labels)

    train = pd.DataFrame({'img': X_train, 'label': y_train})
    test = pd.DataFrame({'img': X_test, 'label': y_test})

    return train, test

#### DATASET

In [None]:
class VSADataset():


    def __init__(self, labels, imgs):

        self.labels = labels
        self.imgs = imgs
    
    def __len__(self):

        return len(self.imgs)

    def __getitem__(self, index):

        label = self.labels[index]
        img = self.imgs[index]
        
        return {
            "img": torch.tensor(img, dtype=torch.float),
            "labels": torch.tensor(label, dtype=torch.float)
        }

#### WRAPPERS

In [None]:
def build_datasets(train, test):

    train_ds = VSADataset(train.label, train.img)
    test_ds = VSADataset(test.label, test.img)
    return train_ds, test_ds

def build_dataloaders(train_ds, test_ds):
    
    train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=2)
    test_dl = DataLoader(test_ds, batch_size=64, shuffle=True, num_workers=2)
    return train_dl, test_dl

### MODEL

In [None]:
class VSAClassifier(nn.Module):


    def __init__(self, do_prob=0.2):

        super(VSAClassifier, self).__init__()
        self.resnet = ResNetForImageClassification.from_pretrained(
            "microsoft/resnet-50")
        self.classifier = nn.Linear(1000, 100)
        self.linear = nn.Linear(100, 1)
        #self.sigmoid = torch.sigmoid

    def forward(self, input):

        output = self.resnet(input)
        output = self.classifier(output.logits)
        output = self.linear(output)
        #output = self.sigmoid(output)
        output = torch.flatten(output)
        return output

In [None]:
def save_model(model, path):

    torch.save({'model_state_dict': model.state_dict()}, path)

### PLOTTER

In [None]:
class Plotter():
    """Keeps track of the metrics and create plots."""

    def __init__(self):

        self.data = {}

    def add(self, d):

        for metric in d:
            if metric in self.data: 
                self.data[metric].append(d[metric])
            else: 
                self.data[metric] = [d[metric]]

    def plot_metric(self, metric):

        y = self.data[metric]
        x = range(1, len(y) + 1)

        plt.figure()
        plt.title(metric)
        plt.xlabel('epoch')
        plt.plot(x, y) 

    def plot(self):

        for metric in self.data:
            self.plot_metric(metric)
            plt.show()

    def save(self, dir_name):

        if not os.path.isdir(dir_name): return
            
        for metric in self.data:
            self.plot_metric(metric)

            plt.savefig("".join([dir_name, metric]))
            plt.close()

### TRAINER

In [None]:
class Trainer() :


    def __init__(self, model, optimizer, device, loss_fn):

        self.net = model
        self.opt = optimizer 
        self.device = device
        self.plotter = Plotter()
        self.loss_fn = loss_fn

    def eval(self, dataloader):

        eval_loss = 0.0
        self.net.eval()
        fin_targets = []
        fin_outputs = []
        with torch.no_grad():
            for bi, d in tqdm(enumerate(dataloader), total=len(dataloader)):
                img = d["img"]
                targets = d["labels"]

                img = img.to(self.device, dtype=torch.float)
                targets = targets.to(self.device, dtype=torch.float)

                outputs = self.net(img)
                loss = self.loss_fn(outputs, targets)
                eval_loss += loss.item()
                fin_targets.extend(targets)
                fin_outputs.extend(outputs)

        fin_outputs = torch.stack(fin_outputs)
        fin_targets = torch.stack(fin_targets)
        fin_outputs = fin_outputs.cpu().detach().numpy()
        fin_targets = fin_targets.cpu().detach().numpy()
            
        return eval_loss, fin_outputs, fin_targets

    def train(self, train_dl, val_dl, epochs, model_id, save_dir=None):

        def train_fn():
            
            train_loss = 0.0
            self.net.train()
            for bi, d in tqdm(enumerate(train_dl), total=len(train_dl)):
                targets = d["labels"]
                img = d["img"]

                img = img.to(self.device, dtype=torch.float)
                targets = targets.to(self.device, dtype=torch.float)

                outputs = self.net(img)
                #print(outputs)
                #print(targets)
                self.opt.zero_grad()
                loss = loss_fn(outputs, targets)
                loss.backward()
                train_loss += loss.item()
                self.opt.step()
                
            return train_loss
        
        if save_dir is not None and os.path.isdir(save_dir):
            datestring = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            save_dir = save_dir + model_id + "_" + datestring + "/"
            os.mkdir(save_dir)

        best_val_loss = 100

        for epoch in tqdm(range(epochs)):
            train_loss = train_fn()
            eval_loss, outputs, labels = self.eval(val_dl)

            avg_train_loss = train_loss / len(train_dl)
            avg_val_loss = eval_loss / len(val_dl)

            self.plotter.add(
                {"train_loss": avg_train_loss, "val_loss": avg_val_loss})

            print("Epoch {}:".format(epoch))
            print("Average Train loss: ", avg_train_loss)
            print("Average Valid loss: ", avg_val_loss)

            if save_dir is not None:
                if avg_val_loss < best_val_loss :
                    best_val_loss = avg_val_loss
                    save_model(self.net, save_dir + "best_model_loss.pt")
                    print("Model saved as current val_loss is: ", best_val_loss)

                self.plotter.save(save_dir) 

    def plot_metrics(self):

        self.plotter.plot()

### TRAINING

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(1)

<torch._C.Generator at 0x7f1e6c003570>

In [None]:
train, test = load_dataset()

In [None]:
train_ds, test_ds = build_datasets(train, test)
train_dl, test_dl = build_dataloaders(train_ds, test_ds)

In [None]:
model = VSAClassifier()
optimizer = Adam(model.parameters(), lr=1e-5)

loss_fn = nn.CrossEntropyLoss()

model.to(device)
model = nn.DataParallel(model)

In [None]:
save_dir = "drive/MyDrive/DeepLearning/visual_sentiment/"
epochs = 40

trainer = Trainer(model, optimizer, device, loss_fn)

trainer.train(train_dl, test_dl, epochs, "resnet-50", save_dir=save_dir)

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 0:
Average Train loss:  80.56529619477011
Average Valid loss:  76.02640329996744
Model saved as current val_loss is:  76.02640329996744


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 1:
Average Train loss:  79.64178823341022
Average Valid loss:  75.57819964090983
Model saved as current val_loss is:  75.57819964090983


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 2:
Average Train loss:  79.17155654863878
Average Valid loss:  75.49836463928223
Model saved as current val_loss is:  75.49836463928223


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 3:
Average Train loss:  78.90296137332916
Average Valid loss:  75.43850784301758
Model saved as current val_loss is:  75.43850784301758


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 4:
Average Train loss:  78.7354178645394
Average Valid loss:  75.106179300944
Model saved as current val_loss is:  75.106179300944


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 5:
Average Train loss:  78.45917039567775
Average Valid loss:  75.51694132486979


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 6:
Average Train loss:  78.18354811451651
Average Valid loss:  75.17223103841145


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 7:
Average Train loss:  77.81297744404186
Average Valid loss:  75.13201115926107


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 8:
Average Train loss:  77.59916706518693
Average Valid loss:  75.07327880859376
Model saved as current val_loss is:  75.07327880859376


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 9:
Average Train loss:  77.4164868593216
Average Valid loss:  75.31134847005208


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 10:
Average Train loss:  77.16627202250741
Average Valid loss:  75.09735463460287


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 11:
Average Train loss:  76.75791596282612
Average Valid loss:  75.2972484588623


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 12:
Average Train loss:  76.74558518149637
Average Valid loss:  75.27195688883464


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 13:
Average Train loss:  76.39693000099875
Average Valid loss:  75.47574996948242


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 14:
Average Train loss:  75.85042469068007
Average Valid loss:  75.19986966451009


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 15:
Average Train loss:  75.7602054422552
Average Valid loss:  75.85274098714193


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 16:
Average Train loss:  75.43415637449785
Average Valid loss:  75.68938000996907


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 17:
Average Train loss:  74.9962136853825
Average Valid loss:  75.15806833902995


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 18:
Average Train loss:  74.74622199752115
Average Valid loss:  75.71143112182617


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 19:
Average Train loss:  74.61199794032358
Average Valid loss:  75.89860687255859


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 20:
Average Train loss:  74.46675851128317
Average Valid loss:  75.77393175760905


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 21:
Average Train loss:  73.99306056716226
Average Valid loss:  75.58239339192708


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 22:
Average Train loss:  73.86149854009801
Average Valid loss:  75.56378987630208


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 23:
Average Train loss:  73.74360625310378
Average Valid loss:  75.53240292867025


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 24:
Average Train loss:  73.18561989339915
Average Valid loss:  75.60595982869467


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 25:
Average Train loss:  72.7559425722469
Average Valid loss:  75.86176884969076


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 26:
Average Train loss:  72.49537027424032
Average Valid loss:  76.30747578938802


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 27:
Average Train loss:  72.24359525333752
Average Valid loss:  76.24672966003418


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 28:
Average Train loss:  71.91455253687772
Average Valid loss:  76.5241527557373


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 29:
Average Train loss:  71.71351272409612
Average Valid loss:  76.18237342834473


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 30:
Average Train loss:  71.35994345491582
Average Valid loss:  76.18411401112874


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 31:
Average Train loss:  70.70046557079662
Average Valid loss:  76.3429620107015


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 32:
Average Train loss:  70.69934693249789
Average Valid loss:  76.64180323282878


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 33:
Average Train loss:  70.41635467789389
Average Valid loss:  76.6783873240153


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 34:
Average Train loss:  69.84362440759486
Average Valid loss:  76.99329071044922


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 35:
Average Train loss:  69.98749512975866
Average Valid loss:  76.77198638916016


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 36:
Average Train loss:  69.78907353227788
Average Valid loss:  76.7579896291097


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 37:
Average Train loss:  68.71223783493042
Average Valid loss:  77.24584833780925


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 38:
Average Train loss:  68.8811466368762
Average Valid loss:  77.7579470316569


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 39:
Average Train loss:  68.61341918598522
Average Valid loss:  77.57442245483398
