In [1]:
# !wget https://archive.org/download/mmimdb/multimodal_imdb.hdf5

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# !cp multimodal_imdb.hdf5 /content/drive/MyDrive/

In [3]:
# !cp /content/drive/MyDrive/multimodal_imdb.hdf5 ./

In [4]:
!git clone https://www.github.com/pliang279/MultiBench.git

fatal: destination path 'MultiBench' already exists and is not an empty directory.


In [5]:
!pip install memory_profiler



In [6]:
%%writefile /content/MultiBench/datasets/imdb/get_data.py

import os
import sys
from typing import *
import numpy as np

# sys.path.append('/content/datasets/imdb/')
# from robustness.visual_robust import visual_robustness
# from robustness.text_robust import text_robustness

# from .vgg import VGGClassifier
# from gensim.models import KeyedVectors

import h5py
from typing import *
from torch.utils.data import Dataset, DataLoader

import json
from PIL import Image
from typing import *
import os
from tqdm import tqdm


class IMDBDataset(Dataset):
    
    def __init__(self, file:h5py.File, start_ind:int, end_ind:int, vggfeature:bool=False) -> None:
        self.file = file
        self.start_ind = start_ind
        self.size = end_ind-start_ind
        self.vggfeature = vggfeature

    def __getitem__(self, ind):
        if not hasattr(self, 'dataset'):
            self.dataset = h5py.File(self.file, 'r')
        text = self.dataset["features"][ind+self.start_ind]
        image = self.dataset["images"][ind+self.start_ind] if not self.vggfeature else \
            self.dataset["vgg_features"][ind+self.start_ind]
        label = self.dataset["genres"][ind+self.start_ind]

        return text, image, label

    def __len__(self):
        return self.size


class IMDBDataset_robust(Dataset):
    
    def __init__(self, dataset, start_ind:int, end_ind:int) -> None:
        self.dataset = dataset
        self.start_ind = start_ind
        self.size = end_ind-start_ind

    def __getitem__(self, ind):
        text = self.dataset[ind+self.start_ind][0]
        image = self.dataset[ind+self.start_ind][1]
        label = self.dataset[ind+self.start_ind][2]

        return text, image, label

    def __len__(self):
        return self.size

def process_data(filename, path):
    data = {}
    filepath = os.path.join(path, filename)

    with Image.open(filepath+".jpeg") as f:
        image = np.array(f.convert("RGB"))
        data["image"] = image
    
    with open(filepath+".json", "r") as f:
        info = json.load(f)
        
        plot = info["plot"]
        data["plot"] = plot

    return data

def get_dataloader(path:str,num_workers:int=8, train_shuffle:bool=True, batch_size:int=40, vgg:bool=False, skip_process=False)->Tuple[Dict]:
    train_dataloader = DataLoader(IMDBDataset(path, 0, 15552, vgg), shuffle=train_shuffle, num_workers=num_workers, batch_size=batch_size)
    val_dataloader = DataLoader(IMDBDataset(path, 15552, 18160, vgg), shuffle=False, num_workers=num_workers, batch_size=batch_size)

    # test_dataset = h5py.File(path, 'r')
    # test_text = test_dataset['features'][18160:25959]
    # test_vision = test_dataset['vgg_features'][18160:25959]
    # labels = test_dataset["genres"][18160:25959]
    # names = test_dataset["imdb_ids"][18160:25959]
    
    # dataset = os.path.join(test_path, "dataset")

    # if not skip_process:
    #     clsf = VGGClassifier(model_path='/home/pliang/multibench/MultiBench/datasets/imdb/vgg16.tar', synset_words='synset_words.txt')
    #     googleword2vec = KeyedVectors.load_word2vec_format('/home/pliang/multibench/MultiBench/datasets/imdb/GoogleNews-vectors-negative300.bin.gz', binary=True)
        
    #     images = []
    #     texts = []
    #     for name in tqdm(names):
    #         name = name.decode("utf-8")
    #         data = process_data(name, dataset)
    #         images.append(data['image'])
    #         plot_id = np.array([len(p) for p in data['plot']]).argmax()
    #         texts.append(data['plot'][plot_id])
    
    # Add visual noises
    # robust_vision = []
    # for noise_level in range(11):
    #     vgg_filename = os.path.join(os.getcwd(), 'vgg_features_{}.npy'.format(noise_level))
    #     if not skip_process:
    #         vgg_features = []
    #         images_robust = visual_robustness(images, noise_level=noise_level/10)
    #         for im in tqdm(images_robust):
    #             vgg_features.append(clsf.get_features(Image.fromarray(im)).reshape((-1,)))
    #         np.save(vgg_filename, vgg_features)
    #     else:
    #         assert os.path.exists(vgg_filename) == True
    #         vgg_features = np.load(vgg_filename, allow_pickle=True)
    #     robust_vision.append([(test_text[i], vgg_features[i], labels[i]) for i in range(len(vgg_features))])
    
    # test_dataloader = dict()
    # test_dataloader['image'] = []
    # for test in robust_vision:
    #     test_dataloader['image'].append(DataLoader(IMDBDataset_robust(test, 0, len(test)), shuffle=False, num_workers=num_workers, batch_size=batch_size))

    # Add text noises
    # robust_text = []
    # for noise_level in range(11):
    #     text_filename = os.path.join(os.getcwd(), 'text_features_{}.npy'.format(noise_level)) 
    #     if not skip_process:
    #         text_features = []
    #         texts_robust = text_robustness(texts, noise_level=noise_level/10)    
    #         for words in tqdm(texts_robust):
    #             words = words.split()
    #             if len([googleword2vec[w] for w in words if w in googleword2vec]) == 0:
    #                 text_features.append(np.zeros((300,)))
    #             else:
    #                 text_features.append(np.array([googleword2vec[w] for w in words if w in googleword2vec]).mean(axis=0))
    #         np.save(text_filename, text_features)
    #     else:
    #         assert os.path.exists(text_filename) == True
    #         text_features = np.load(text_filename, allow_pickle=True)
    #     robust_text.append([(text_features[i], test_vision[i], labels[i]) for i in range(len(text_features))])
    # test_dataloader['text'] = []
    # for test in robust_text:
    #     test_dataloader['text'].append(DataLoader(IMDBDataset_robust(test, 0, len(test)), shuffle=False, num_workers=num_workers, batch_size=batch_size))
    return train_dataloader, val_dataloader

Overwriting /content/MultiBench/datasets/imdb/get_data.py


In [7]:
%cd MultiBench

/content/MultiBench


In [8]:
import h5py
test_dataset = h5py.File("/content/drive/MyDrive/multimodal_imdb.hdf5", 'r')

In [9]:
test_dataset.keys()

<KeysViewHDF5 ['features', 'genres', 'images', 'imdb_ids', 'sequences', 'three_grams', 'vgg_features', 'word_grams']>

In [10]:
import sys
import os
import torch

from training_structures.Supervised_Learning import train, test
from fusions.common_fusions import Concat
from datasets.imdb.get_data import get_dataloader
from unimodals.common_models import Linear, MaxOut_MLP

traindata, validdata = get_dataloader("/content/drive/MyDrive/multimodal_imdb.hdf5", skip_process=True, vgg=True, batch_size=128)

  cpuset_checked))


In [11]:
encoders=[MaxOut_MLP(512, 512, 300, linear_layer=False), MaxOut_MLP(512, 1024, 4096, 512, False)]
head= Linear(1024, 23).cuda()
fusion=Concat().cuda()

filename = "best_lf.pt"

train(encoders,fusion,head,traindata,validdata,1000, early_stop=True,task="multilabel",\
    save=filename, optimtype=torch.optim.AdamW,lr=8e-3,weight_decay=0.01, objective=torch.nn.BCEWithLogitsLoss())

# print("Testing:")
# test(model,testdata,method_name="lf",dataset="imdb",criterion=torch.nn.BCEWithLogitsLoss(),task="multilabel")

  cpuset_checked))


Epoch 0 train loss: tensor(0.2807, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 0 valid loss: tensor(0.2068, device='cuda:0') f1_micro: 0.5760802604945877 f1_macro: 0.4424341473151621
Saving Best


  cpuset_checked))


Epoch 1 train loss: tensor(0.1960, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 1 valid loss: tensor(0.1950, device='cuda:0') f1_micro: 0.5746980292434839 f1_macro: 0.4534234583888036
Saving Best


  cpuset_checked))


Epoch 2 train loss: tensor(0.1737, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 2 valid loss: tensor(0.1937, device='cuda:0') f1_micro: 0.5915075420699276 f1_macro: 0.49470319411848096
Saving Best


  cpuset_checked))


Epoch 3 train loss: tensor(0.1557, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 3 valid loss: tensor(0.1971, device='cuda:0') f1_micro: 0.5883193427397749 f1_macro: 0.4973879100331768
Saving Best


  cpuset_checked))


Epoch 4 train loss: tensor(0.1360, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 4 valid loss: tensor(0.2077, device='cuda:0') f1_micro: 0.5850000000000001 f1_macro: 0.47701555105164845


  cpuset_checked))


Epoch 5 train loss: tensor(0.1189, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 5 valid loss: tensor(0.2190, device='cuda:0') f1_micro: 0.5891891891891892 f1_macro: 0.5109817837348082
Saving Best


  cpuset_checked))


Epoch 6 train loss: tensor(0.1026, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 6 valid loss: tensor(0.2361, device='cuda:0') f1_micro: 0.5794520547945207 f1_macro: 0.5031924285316908


  cpuset_checked))


Epoch 7 train loss: tensor(0.0869, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 7 valid loss: tensor(0.2645, device='cuda:0') f1_micro: 0.5840578532542456 f1_macro: 0.49927568530945504


  cpuset_checked))


Epoch 8 train loss: tensor(0.0773, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 8 valid loss: tensor(0.2849, device='cuda:0') f1_micro: 0.5823761076468658 f1_macro: 0.4955320323521612


  cpuset_checked))


Epoch 9 train loss: tensor(0.0650, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 9 valid loss: tensor(0.3035, device='cuda:0') f1_micro: 0.5778698715292168 f1_macro: 0.49670001043923867


  cpuset_checked))


Epoch 10 train loss: tensor(0.0593, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 10 valid loss: tensor(0.3209, device='cuda:0') f1_micro: 0.5781326380967899 f1_macro: 0.49945978218598575


  cpuset_checked))


Epoch 11 train loss: tensor(0.0529, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 11 valid loss: tensor(0.3375, device='cuda:0') f1_micro: 0.5663265306122449 f1_macro: 0.4804251731582679


  cpuset_checked))


Epoch 12 train loss: tensor(0.0465, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 12 valid loss: tensor(0.3632, device='cuda:0') f1_micro: 0.5706138879570614 f1_macro: 0.473292311306469


  cpuset_checked))


Epoch 13 train loss: tensor(0.0445, device='cuda:0', grad_fn=<DivBackward0>)
Epoch 13 valid loss: tensor(0.3644, device='cuda:0') f1_micro: 0.5742924528301887 f1_macro: 0.48699049182384774
Training Time: 294.93538069725037
Training Peak Mem: 2990.59765625
Training Params: 10311279


In [14]:
!pip install git+https://gchhablani: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@github.com/pliang279/multimodal_analysis.git

Collecting git+https://gchhablani:****@github.com/pliang279/multimodal_analysis.git
  Cloning https://gchhablani:****@github.com/pliang279/multimodal_analysis.git to /tmp/pip-req-build-86aoy31h
  Running command git clone -q 'https://gchhablani:****@github.com/pliang279/multimodal_analysis.git' /tmp/pip-req-build-86aoy31h


In [15]:
import torch, torchvision
import matplotlib.pyplot as plt
import json
import cv2
import torch.nn.functional as F
import numpy as np
from copy import deepcopy

In [16]:
from mma.analysis.metrics.emap import Emap

In [17]:
text_features = None
visual_features = None
labels = None

for batch in validdata:
    if text_features is None:
        text_features = batch[0].numpy()
        visual_features = batch[1].numpy()
        labels = batch[2].numpy()
    else:
        text_features = np.concatenate((text_features, batch[0].numpy()), axis=0)
        visual_features = np.concatenate((visual_features, batch[1].numpy()), axis=0)
        labels = np.concatenate((labels, batch[2].numpy()), axis=0)

  cpuset_checked))


In [61]:
dataset = {
    'textual_inputs': text_features,
    'visual_inputs': visual_features,
}

In [60]:
text_features.shape

(2608, 300)

In [50]:
model=torch.load("best_lf.pt").cuda()

In [51]:
import gc

In [56]:
def predictor_fn(visual_inputs, textual_inputs):
    model_out  = model([torch.from_numpy(textual_inputs).cuda(), torch.from_numpy(visual_inputs).cuda()])
    pred_vqa = F.softmax(model_out, dim = -1).detach().cpu().numpy()
    gc.collect()
    return pred_vqa

In [63]:
emap = Emap(predictor_fn, dataset)

In [64]:
emap_scores = emap.compute_emap_scores(512)
orig_scores = emap.compute_predictions('orig', 512)

In [65]:
from sklearn.metrics import f1_score

In [66]:
assert np.allclose(emap_scores, orig_scores, atol = 1e-6) is False # Check if the values are not equal

In [67]:
orig_pred_labels = (orig_scores>0.5).astype(np.int32)

In [69]:
f1_score(labels, orig_pred_labels, average='micro')

0.3925538103548575

In [70]:
emap_pred_labels = (emap_scores>0.5).astype(np.int32)

In [71]:
f1_score(labels, emap_pred_labels, average='micro')

0.34194491835242496