## Generate pickle
---

Goal: generate a formatted data from the features extracted that the model expects as input

Format: (train_data, train_label, test_data, test_label, maxlen, train_length, test_length)

In [38]:
import numpy as np
import pandas as pd
import video_features_extraction as vf
import pickle

### Pre-processed values

In [157]:
path = '../multimodal-sentiment-analysis/dataset/mosi/raw/text_2way.pickle'
with open(path, 'rb') as handle:
    u = pickle._Unpickler(handle)
    u.encoding = 'latin1'
    (train_data, train_label, test_data, test_label, maxlen, train_length, test_length) = u.load()

In [158]:
train_data.shape

(62, 63, 100)

In [45]:
train_label.shape

(62, 63)

In [108]:
len(test_length)

31

### Labels

In [19]:
df = pd.read_csv('OpinionLevelSentiment.csv',  skiprows=0, names=['start', 'end', 'video_id', 'segment', 'score'])
df[:10]

Unnamed: 0,start,end,video_id,segment,score
0,27.879138,32.927664,_dI--eQ6qVU,1,2.4
1,45.54898,47.7839,_dI--eQ6qVU,2,3.0
2,47.7839,59.247846,_dI--eQ6qVU,3,-0.2
3,59.247846,68.277324,_dI--eQ6qVU,4,3.0
4,68.277324,71.20068,_dI--eQ6qVU,5,3.0
5,71.20068,72.986621,_dI--eQ6qVU,6,2.8
6,75.401134,77.546258,_dI--eQ6qVU,7,1.8
7,77.546258,81.30771,_dI--eQ6qVU,8,2.6
8,85.238776,87.154422,_dI--eQ6qVU,9,0.6
9,87.154422,91.813832,_dI--eQ6qVU,10,1.0


### Video Features

In [56]:
standard_train_fold=['2iD-tVS8NPw', '8d-gEyoeBzc', 'Qr1Ca94K55A', 'Ci-AH39fi3Y', '8qrpnFRGt2A', 'Bfr499ggo-0', 'QN9ZIUWUXsY', '9T9Hf74oK10', '7JsX8y1ysxY', '1iG0909rllw', 'Oz06ZWiO20M', 'BioHAh1qJAQ', '9c67fiY0wGQ', 'Iu2PFX3z_1s', 'Nzq88NnDkEk', 'Clx4VXItLTE', '9J25DZhivz8', 'Af8D0E4ZXaw', 'TvyZBvOMOTc', 'W8NXH0Djyww', '8OtFthrtaJM', '0h-zjBukYpk', 'Vj1wYRQjB-o', 'GWuJjcEuzt8', 'BI97DNYfe5I', 'PZ-lDQFboO8', '1DmNV9C1hbY', 'OQvJTdtJ2H4', 'I5y0__X72p0', '9qR7uwkblbs', 'G6GlGvlkxAQ', '6_0THN4chvY', 'Njd1F0vZSm4', 'BvYR0L6f2Ig', '03bSnISJMiM', 'Dg_0XKD0Mf4', '5W7Z1C_fDaE', 'VbQk4H8hgr0', 'G-xst2euQUc', 'MLal-t_vJPM', 'BXuRRbG0Ugk', 'LSi-o-IrDMs', 'Jkswaaud0hk', '2WGyTLYerpo', '6Egk_28TtTM', 'Sqr0AcuoNnk', 'POKffnXeBds', '73jzhE8R1TQ', 'OtBXNcAL_lE', 'HEsqda8_d0Q', 'VCslbP0mgZI', 'IumbAb8q2dM']

standard_valid_fold=['WKA5OygbEKI', 'c5xsKMxpXnc', 'atnd_PF-Lbs', 'bvLlb-M3UXU', 'bOL9jKpeJRs', '_dI--eQ6qVU', 'ZAIRrfG22O0', 'X3j2zQgwYgE', 'aiEXnCPZubE', 'ZUXBRvtny7o']

standard_test_fold=['tmZoasNr4rU', 'zhpQhgha_KU', 'lXPQBPVc5Cw', 'iiK8YX8oH1E', 'tStelxIAHjw', 'nzpVDcQ0ywM', 'etzxEpPuc6I', 'cW1FSBF59ik', 'd6hH302o4v8', 'k5Y_838nuGo', 'pLTX3ipuDJI', 'jUzDDGyPkXU', 'f_pcplsH_V0', 'yvsjCA6Y5Fc', 'nbWiPyCm4g0', 'rnaNMUZpvvg', 'wMbj6ajWbic', 'cM3Yna7AavY', 'yDtzw_Y-7RU', 'vyB00TXsimI', 'dq3Nf_lMPnE', 'phBUpBr1hSo', 'd3_k5Xpfmik', 'v0zCBqDeKcE', 'tIrG4oNLFzE', 'fvVhgmXxadc', 'ob23OKe5a9Q', 'cXypl4FnoZo', 'vvZ4IcEtiZc', 'f9O3YtZ2VfI', 'c7UH_rxdZv4']

In [55]:
train = np.load('./output/mosi_train.npy')
test = np.load('./output/mosi_test_2.npy')
train.shape, test.shape

((62, 63, 400), (31, 63, 400))

In [26]:
info_names, max_utterance = vf.get_video_info('../MOSI_Dataset/Test_Segmented/', '_')

In [100]:
df[df.video_id == name].sort_values(by=['segment']).shape[0]

30

In [101]:
def get_labels_array(filenames):
    
    max_utterance = train.shape[1]
    labels = np.empty((0,max_utterance))
    utterance_length = []

    for name in info_names:
        if(name in filenames):
            df_segment = df[df.video_id == name].sort_values(by=['segment'])
            scores = df_segment['score'].values
            labels = np.append(labels, [vf.pad_array(scores,max_utterance)], axis=0)
            
            utterance_length.append(df_segment.shape[0])

    return labels, utterance_length

In [103]:
train_l = get_labels_array(standard_train_fold)
test_l = get_labels_array(standard_valid_fold)

In [105]:
train_l[1]

[14, 30]

### Generate pickle

Format: (train_data, train_label, test_data, test_label, maxlen, train_length, test_length)

In [110]:
data = train, train_l[0], test, test_l[0], train.shape[1], train_l[1], test_l[1]

In [113]:
path = './'
name = 'pickle_test.pkl'
pickle_out = open(path + name,"wb")
pickle.dump(data, pickle_out)
pickle_out.close()

In [152]:
path = '/Users/silvana/Projetos/SemesterProject/multimodal-sentiment-analysis/dataset/mosi/raw/video.pickle'
with open(path, 'rb') as handle:
    u = pickle._Unpickler(handle)
    u.encoding = 'latin1'
    (train_data, train_label, test_data, test_label, maxlen, train_length, test_length) = u.load()

In [153]:
train_data.shape

(62, 63, 100)