# Co-learning with Memory Fusion Network (MFN)

## Setup

In [31]:
import numpy as np
import pandas as pd
import os
import copy
from training_loops import train_ef_IM_bi, train_mfn
from tqdm import tqdm
import random
import h5py
import torch

In [25]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

## Data

In [26]:
def load_saved_data():
	h5f = h5py.File('../data/MOSI/X_train.h5','r')
	X_train = h5f['data'][:]
	h5f.close()
	h5f = h5py.File('../data/MOSI/y_train.h5','r')
	y_train = h5f['data'][:]
	h5f.close()
	h5f = h5py.File('../data/MOSI/X_valid.h5','r')
	X_valid = h5f['data'][:]
	h5f.close()
	h5f = h5py.File('../data/MOSI/y_valid.h5','r')
	y_valid = h5f['data'][:]
	h5f.close()
	h5f = h5py.File('../data/MOSI/X_test.h5','r')
	X_test = h5f['data'][:]
	h5f.close()
	h5f = h5py.File('../data/MOSI/y_test.h5','r')
	y_test = h5f['data'][:]
	h5f.close()
	return X_train, y_train, X_valid, y_valid, X_test, y_test

In [27]:
#Data is read in in as Data Size x Sequence Len x Num Features. Please Note Num Features is organized as 300 text, 20 visual, 5 audio TODO CONFIRM THIS I also believe the start of time sequence is padded with zeros
X_train, y_train, X_valid, y_valid, X_test, y_test = load_saved_data()

In [28]:
#We also need a version of the training data that only has the text modalitiy
X_train_text_modality, X_valid_text_modality, X_test_text_modality = copy.deepcopy(X_train), copy.deepcopy(X_valid), copy.deepcopy(X_test)
y_train_text_modality, y_valid_text_modality, y_test_text_modality = copy.deepcopy(y_train), copy.deepcopy(y_valid), copy.deepcopy(y_test)

#We are only taking the first 300 features (corresponding to text modality)
X_train_text_modality[:,:,300:] = 0.0
X_valid_text_modality[:,:,300:] = 0.0
X_test_text_modality[:,:,300:] = 0.0

## Hyperparameters

In [29]:
config = dict()
config["input_dims"] = [300,5,20]
hl = 128
ha = 32
hv = 32
config["h_dims"] = [hl,ha,hv]
config["memsize"] = 128
config["windowsize"] = 2
config["batchsize"] = 128
config["num_epochs"] = 50
config["lr"] = .01
config["momentum"] = .9
NN1Config = dict()
NN1Config["shapes"] = 128
NN1Config["drop"] = 0.0
NN2Config = dict()
NN2Config["shapes"] = 64
NN2Config["drop"] = .2
gamma1Config = dict()
gamma1Config["shapes"] = 256
gamma1Config["drop"] = 0.0
gamma2Config = dict()
gamma2Config["shapes"] = 64
gamma2Config["drop"] = .2
outConfig = dict()
outConfig["shapes"] =64
outConfig["drop"] = .5
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

## Training and Testing 

In [32]:
#Training Full Multimodal on different multimodal dropout settings
total_seeds = 5
for i in range(total_seeds):
    set_seed(i)
    modality_drop = [[0,0,0],[.15,.15,0],[.4,.4, 0],[.6,.6, 0], [.8,.8,0], [.9,.9,0], [.95,.95,0]]

    res = []
    for mod_drop_probs in tqdm(modality_drop):
        a_d = mod_drop_probs[0]
        v_d = mod_drop_probs[1]
        l_d = mod_drop_probs[2]

        results = train_mfn(X_train, y_train, X_valid_text_modality, y_valid, X_test_text_modality, 
                  y_test_text_modality, configs, a_d,v_d,l_d)

        res.append(results)
    
    #Training Unimodal Version 
    results = train_mfn(X_train_text_modality, y_train_text_modality,
          X_valid_text_modality, y_valid_text_modality, 
          X_test_text_modality, y_test_text_modality, 
          configs, 0, 0, 0)
    
    #setting N/A since it is unimodal 
    results[0] = 'N/A'
    results[1] = 'N/A'
    results[2] = 'N/A'
    res.append(results)
    
    #writing results
    cols = ['audio_dropout', 'language_dropout', 'video_dropout', 'acc', 'mae', 'f_score']
    result_df = pd.DataFrame(res, columns = cols)
    result_df.to_csv('../output/co_learning_MFN_MOSI/dropout_results_'+ str(i)+ '.csv', sep = '\t')

  0%|          | 0/7 [00:02<?, ?it/s]

0 1.3979162573814392 1.4017857313156128 saving model





FileNotFoundError: [Errno 2] No such file or directory: 'output/mfn_50494.pt'