# **Testing models and features**
Once the data is loaded from the text files and the CSV has been generated for each feature, the modeling part can be skipped.
P.S: CSV has to be created for all features, they are being used in the code.


## **Intialization**

In [6]:
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from tensorflow.keras import layers
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor

from xgboost.sklearn import XGBRegressor
from sklearn.tree import DecisionTreeRegressor

#Shows file loading progress 
import tqdm
from tqdm import tqdm_notebook as tqdm
from pathlib import Path

#Install and import pip
!pip install pyprind
import pyprind



In [7]:
#Mount google drive to be user
from google.colab import drive
import os
drive.mount('/content/drive/')
#chage the code above if needed


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [8]:
def read_C3D(fname):
    """Scan vectors from file"""
    with open(fname) as f:
        for line in f:
            C3D =[float(item) for item in line.split()] #convert to float using the seperator
    return C3D

def read_HMP(fname):
    """Scan HMP(Histogram of Motion Patterns) features from file"""
    with open(fname) as f:
        for line in f:
            pairs=line.split()
            HMP_temp = { int(p.split(':')[0]) : float(p.split(':')[1]) for p in pairs}
    # there are 6075 bins, fill zeros
    HMP = np.zeros(6075)
    for idx in HMP_temp.keys():
        HMP[idx-1] = HMP_temp[idx]            
    return HMP

def read_ColorHistogram(fname):
    """Scan Color Histogram from file
    Input file contains RGB histogram,
    Return a matrix of (3,256)"""
    RGB_Hist = np.zeros((3,256))
    with open(fname) as f:
        i_l = 0 # line index
        for line in f:
            pairs = line.split()
            hist_dict = {int(p.split(':')[0]):float(p.split(':')[1]) for p in pairs}
            for idx in hist_dict.keys():
                RGB_Hist[i_l,idx] = hist_dict[idx]
            i_l += 1
    return RGB_Hist

In [9]:
def Get_score(Y_pred,Y_true):
    '''Calculate the Spearmann"s correlation coefficient'''
    Y_pred = np.squeeze(Y_pred)
    Y_true = np.squeeze(Y_true)
    if Y_pred.shape != Y_true.shape:
        print('Input shapes don\'t match!')
    else:
        if len(Y_pred.shape) == 1:
            Res = pd.DataFrame({'Y_true':Y_true,'Y_pred':Y_pred})
            score_mat = Res[['Y_true','Y_pred']].corr(method='spearman',min_periods=1)
            print('The Spearman\'s correlation coefficient is: %.3f' % score_mat.iloc[1][0])
        else:
            for ii in range(Y_pred.shape[1]):
                Get_score(Y_pred[:,ii],Y_true[:,ii])

Loading Target Variable

In [10]:
#Loading target variable
pathGT ='/content/drive/MyDrive/CA684_Assignment/Dev-set/Ground-truth'

gTruth = pd.read_csv(pathGT+'/ground-truth.csv')
gTruth = gTruth[["video", "short-term_memorability", "long-term_memorability"]]
gTruth["video"] = gTruth["video"].str.replace(".webm", "")
gTruth.head()

Unnamed: 0,video,short-term_memorability,long-term_memorability
0,video3,0.924,0.846
1,video4,0.923,0.667
2,video6,0.863,0.7
3,video8,0.922,0.818
4,video10,0.95,0.9


## **Loading C3D data and testing it with different models.**


In [None]:
#=====================================C3D==============================
#Loading C3D
C3D_path = '/content/drive/MyDrive/CA684_Assignment/Dev-set/C3D/'

C3D_list = []

pbar = pyprind.ProgBar(len(os.listdir(C3D_path)), title='Importing C3D files')

for file in os.listdir(C3D_path):
    path = os.path.join(C3D_path, file)
    arrayFile = read_C3D(path)
    file= file.replace(".txt","")
    C3D_list.append([file] + arrayFile)
    pbar.update()

C3D_cols = ["C3D_"+str(i) for i in range(1, 102)]
C3D_cols = ["video"] + C3D_cols
C3D = pd.DataFrame(C3D_list, columns = C3D_cols)
del C3D_list
del C3D_cols
C3D.head()

Importing C3D files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:10


Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,C3D_62,C3D_63,C3D_64,C3D_65,C3D_66,C3D_67,C3D_68,C3D_69,C3D_70,C3D_71,C3D_72,C3D_73,C3D_74,C3D_75,C3D_76,C3D_77,C3D_78,C3D_79,C3D_80,C3D_81,C3D_82,C3D_83,C3D_84,C3D_85,C3D_86,C3D_87,C3D_88,C3D_89,C3D_90,C3D_91,C3D_92,C3D_93,C3D_94,C3D_95,C3D_96,C3D_97,C3D_98,C3D_99,C3D_100,C3D_101
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,0.001623,0.970125,1.6e-05,0.001298,3.2e-05,1e-06,0.0,0.0,1e-08,2.5e-07,0.0,0.0,5e-08,0.0,1e-08,1e-08,4.2e-07,0.0,3e-08,2e-08,0.0,0.0,0.0,6e-08,0.0,0.0,9e-08,0.0,0.0,1.1e-07,1.4e-07,0.0,0.0,1.7e-07,0.0,0.0,1e-08,1.3e-06,2.6e-06,8e-08
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3.24e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,6.86e-06,3.99e-06,8.03e-06,1.45e-06,...,4.5e-05,2.7e-05,1.3e-05,7.7e-05,5e-05,0.000138,1.2e-05,7e-06,4.1e-07,5.44e-06,3.4e-07,7e-06,8.399e-05,4e-06,3.97e-06,5.9e-07,0.00012853,8.8e-07,2.228e-05,1.105e-05,3e-06,1.5e-05,1.4e-05,4.09e-06,3e-06,5e-06,4.92e-06,5e-06,6.4e-07,2.372e-05,6.61e-06,1e-05,2e-06,5.8e-06,1e-06,1.49e-06,1.17e-05,1.5e-07,8.3e-07,0.000106
2,video6633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999985,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.12e-06
3,video6645,0.005959,0.004765,0.003757,0.00057871,7.344e-05,0.000129,0.00046653,0.010266,0.00030281,0.00021965,0.00025218,0.00296373,3.2e-05,0.000592,3.26e-05,0.00146643,0.00024307,0.000181,0.004019,0.00239096,0.00012069,0.008488,0.00052162,0.00041646,0.0164342,0.049686,0.00147055,5.432e-05,0.001055,0.00544277,0.000173,0.024688,0.00096407,0.00088,0.01140529,0.00452216,0.00085945,0.0006573,0.00069424,...,0.021621,0.000639,0.014109,0.000472,0.002461,0.004046,0.000905,0.000997,0.00251466,0.00158744,0.00112503,0.001008,0.02133877,0.000647,0.00137301,9.568e-05,0.05635083,1.628e-05,0.00537991,0.00602002,0.046499,0.023942,0.020658,0.00334952,0.002637,9.4e-05,0.01170705,0.000229,0.00978921,0.00276439,0.00805235,0.003992,0.004137,0.0367039,0.000767,0.00036921,0.0125198,8.422e-05,0.01159825,0.00115504
4,video6643,0.005782,0.000306,0.004011,1.007e-05,1.034e-05,2e-06,3.16e-06,3e-06,1.984e-05,5.75e-06,6.642e-05,6.69e-06,0.000301,0.004799,2.8e-07,1.669e-05,2.67e-06,1e-06,1.1e-05,6.526e-05,3.1e-07,3.6e-05,1.4e-06,6.26e-06,0.01750103,1.9e-05,0.01190515,4.45e-06,5e-06,2.387e-05,7e-06,3.5e-05,3.63e-06,0.66742,0.00034824,1.9e-07,1.121e-05,6.83e-06,0.00018376,...,0.068799,0.005818,0.000535,0.001711,0.112263,0.000408,3.5e-05,0.000267,5.395e-05,3.899e-05,3.619e-05,0.000321,0.00045509,2e-06,6.2e-06,7.51e-06,0.00797192,7e-08,1.873e-05,0.00015111,1.1e-05,0.000398,1e-06,4.09e-06,4e-06,5e-06,2.08e-06,1e-06,0.00016518,2.738e-05,2.106e-05,2e-06,4e-06,0.00681835,5e-06,5e-08,2.088e-05,0.00127175,0.00048622,1.965e-05


Creating a CSV file for C3D data frame. This CSV file will be used later. You can directly skip to the part where colour histogram data is being loaded **after running the code below**.

In [None]:
C3D.to_csv("/content/drive/MyDrive/C3D.csv")

In [None]:
#Merging with ground truth
C3D = pd.merge(C3D, gTruth, on = ["video",'video'])
C3D.head(2)

In [None]:
# Splitting it into train and test
C3D_train, C3D_test = train_test_split(C3D, test_size = 0.2)

In [None]:
C3D_train_x, C3D_train_y_short, C3D_train_y_long, C3D_test_x, C3D_test_y_short, C3D_test_y_long = C3D_train.drop(["short-term_memorability", "long-term_memorability"], axis = 1), C3D_train["short-term_memorability"], C3D_train["long-term_memorability"], C3D_test.drop(["short-term_memorability", "long-term_memorability"], axis = 1), C3D_test["short-term_memorability"], C3D_test["long-term_memorability"]

In [None]:
C3D_train_x, C3D_test_x = C3D_train_x.drop(["video"], axis = 1), C3D_test_x.drop(["video"], axis = 1)

C3D model testing (can be skipped)

In [None]:
cols = C3D_train_x.columns
sc = StandardScaler()
sc.fit(C3D_train_x)
C3D_train_x = pd.DataFrame(sc.transform(C3D_train_x), columns = cols)
C3D_test_x = pd.DataFrame(sc.transform(C3D_test_x), columns = cols)

In [None]:
#Short term mem with SVR()
svr_c3d = SVR()
svr_c3d.fit(C3D_train_x, C3D_train_y_short)

svr_c3d_pred_short = svr_c3d.predict(C3D_test_x)
Get_score(svr_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.262


In [None]:
#Long term mem with SVR()
svr_c3d = SVR()
svr_c3d.fit(C3D_train_x, C3D_train_y_long)

svr_c3d_pred_long = svr_c3d.predict(C3D_test_x)
Get_score(svr_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.086


In [None]:
#Short term for XGBoost
xgb_c3d = XGBRegressor(objective = "reg:squarederror")
xgb_c3d.fit(C3D_train_x, C3D_train_y_short)

xgb_c3d_pred_short = xgb_c3d.predict(C3D_test_x)
Get_score(xgb_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.274


In [None]:
#Long term for XGBoost
xgb_c3d = XGBRegressor(objective = "reg:squarederror")
xgb_c3d.fit(C3D_train_x, C3D_train_y_long)

xgb_c3d_pred_long = xgb_c3d.predict(C3D_test_x)
Get_score(xgb_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.136


In [None]:
#Short term for KNN
knn_c3d = KNeighborsRegressor()
knn_c3d.fit(C3D_train_x, C3D_train_y_short)

knn_c3d_pred_short = knn_c3d.predict(C3D_test_x)
Get_score(knn_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.177


In [None]:
#Long term for KNN
knn_c3d = KNeighborsRegressor()
knn_c3d.fit(C3D_train_x, C3D_train_y_long)

knn_c3d_pred_long = knn_c3d.predict(C3D_test_x)
Get_score(knn_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.073


In [None]:
#Short term Random Forest
rf_c3d = RandomForestRegressor()
rf_c3d.fit(C3D_train_x, C3D_train_y_short)

rf_c3d_pred_short = rf_c3d.predict(C3D_test_x)
Get_score(rf_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.304


In [None]:
#Long term Random Forest
rf_c3d = RandomForestRegressor()
rf_c3d.fit(C3D_train_x, C3D_train_y_long)

rf_c3d_pred_long = rf_c3d.predict(C3D_test_x)
Get_score(rf_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.134


In [None]:
#Short term ExtraTrees
et_c3d = ExtraTreesRegressor()
et_c3d.fit(C3D_train_x, C3D_train_y_short)

et_c3d_pred_short = et_c3d.predict(C3D_test_x)
Get_score(et_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.274


In [None]:
#Long term ExtraTrees
et_c3d = ExtraTreesRegressor()
et_c3d.fit(C3D_train_x, C3D_train_y_long)

et_c3d_pred_long = et_c3d.predict(C3D_test_x)
Get_score(et_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.115


In [None]:
#Short term AdaBoost
adb_c3d = AdaBoostRegressor()
adb_c3d.fit(C3D_train_x, C3D_train_y_short)

adb_c3d_pred_short = adb_c3d.predict(C3D_test_x)
Get_score(adb_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.252


In [None]:
#Long term AdaBoost
adb_c3d = AdaBoostRegressor()
adb_c3d.fit(C3D_train_x, C3D_train_y_long)

adb_c3d_pred_long = adb_c3d.predict(C3D_test_x)
Get_score(adb_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.106


In [None]:
#Short term Gradient Boost
gbr_c3d = GradientBoostingRegressor()
gbr_c3d.fit(C3D_train_x, C3D_train_y_short)

gbr_c3d_pred_short = gbr_c3d.predict(C3D_test_x)
Get_score(gbr_c3d_pred_short, C3D_test_y_short)

The Spearman's correlation coefficient is: 0.274


In [None]:
#Long term Gradient Boost
gbr_c3d = GradientBoostingRegressor()
gbr_c3d.fit(C3D_train_x, C3D_train_y_long)

gbr_c3d_pred_long = gbr_c3d.predict(C3D_test_x)
Get_score(gbr_c3d_pred_long, C3D_test_y_long)

The Spearman's correlation coefficient is: 0.137


## **Loading colour histogram data and testing it with different models.**

In [None]:
#================================COLOUR HISTOGRAM=============================
CH_path = '/content/drive/MyDrive/CA684_Assignment/Dev-set/ColorHistogram/'

CH_list = []

pbar = pyprind.ProgBar(len(os.listdir(CH_path)), title='Importing ColorHistogram files')

for file in os.listdir(CH_path):
    path = os.path.join(CH_path, file)
    arrayFile = read_ColorHistogram(path)
    arrayFile = list(arrayFile[0]) + list(arrayFile[1]) + list(arrayFile[2])
    file= file.replace(".txt","")
    CH_list.append([file] + arrayFile)
    pbar.update()

CH_cols = ["CH_R_"+str(i) for i in range(0, 256)] + ["CH_G_"+str(i) for i in range(0, 256)] + ["CH_B_"+str(i) for i in range(0, 256)]
CH_cols = ["video"] + CH_cols
CH = pd.DataFrame(CH_list, columns = CH_cols)
del CH_list
del CH_cols

Importing ColorHistogram files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 01:57:59


In [None]:
CH.head()

Unnamed: 0,video,CH_R_0,CH_R_1,CH_R_2,CH_R_3,CH_R_4,CH_R_5,CH_R_6,CH_R_7,CH_R_8,CH_R_9,CH_R_10,CH_R_11,CH_R_12,CH_R_13,CH_R_14,CH_R_15,CH_R_16,CH_R_17,CH_R_18,CH_R_19,CH_R_20,CH_R_21,CH_R_22,CH_R_23,CH_R_24,CH_R_25,CH_R_26,CH_R_27,CH_R_28,CH_R_29,CH_R_30,CH_R_31,CH_R_32,CH_R_33,CH_R_34,CH_R_35,CH_R_36,CH_R_37,CH_R_38,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7365-56,21713.0,3059.0,3169.0,3088.0,3156.0,3287.0,3411.0,3508.0,3725.0,3761.0,3879.0,4184.0,4347.0,4776.0,4893.0,5452.0,5934.0,6391.0,7001.0,7094.0,8119.0,8523.0,9025.0,9385.0,10135.0,10513.0,11072.0,11354.0,11989.0,12068.0,12828.0,12209.0,12996.0,12179.0,12962.0,12388.0,12976.0,12881.0,13598.0,...,9792.0,11763.0,13870.0,16721.0,19241.0,20207.0,18109.0,13610.0,11246.0,8668.0,6698.0,5636.0,4512.0,3818.0,3429.0,3028.0,2747.0,2243.0,2165.0,1847.0,1626.0,1400.0,1391.0,1140.0,941.0,881.0,783.0,726.0,606.0,580.0,460.0,358.0,341.0,295.0,240.0,249.0,161.0,142.0,120.0,507.0
1,video7370-56,803.0,301.0,552.0,597.0,1001.0,1078.0,1548.0,1630.0,2278.0,2262.0,3295.0,3242.0,4241.0,4475.0,4338.0,5218.0,5166.0,5662.0,5537.0,6164.0,5786.0,5026.0,5350.0,4428.0,4363.0,3700.0,4009.0,4305.0,3595.0,10475.0,20380.0,62714.0,148230.0,176974.0,233718.0,95399.0,221205.0,123922.0,133073.0,...,52.0,56.0,46.0,47.0,47.0,43.0,58.0,62.0,46.0,46.0,39.0,48.0,37.0,53.0,43.0,56.0,36.0,50.0,52.0,47.0,51.0,54.0,59.0,54.0,41.0,68.0,49.0,71.0,60.0,78.0,91.0,75.0,93.0,89.0,104.0,125.0,315.0,235.0,72.0,230.0
2,video737-56,13964.0,3047.0,3423.0,3847.0,4281.0,4468.0,5067.0,5413.0,5928.0,5994.0,6507.0,6744.0,7207.0,7236.0,7751.0,7645.0,8070.0,8046.0,8860.0,8481.0,9412.0,8876.0,9875.0,9639.0,10086.0,9826.0,10049.0,10325.0,10323.0,10651.0,10851.0,10595.0,10983.0,10914.0,11096.0,10576.0,10891.0,10512.0,10823.0,...,5712.0,7085.0,5566.0,4606.0,4616.0,4350.0,3699.0,3315.0,3052.0,3257.0,2842.0,2739.0,3163.0,2762.0,3292.0,3134.0,3316.0,2762.0,2955.0,2927.0,2524.0,2356.0,2010.0,1741.0,1339.0,1196.0,1116.0,910.0,844.0,621.0,634.0,408.0,409.0,446.0,300.0,308.0,232.0,228.0,187.0,655.0
3,video7370-0,94.0,59.0,71.0,96.0,212.0,230.0,438.0,422.0,766.0,796.0,1256.0,1555.0,2213.0,2520.0,2784.0,3605.0,3664.0,3746.0,3925.0,3878.0,4326.0,3640.0,4141.0,3894.0,4164.0,3659.0,4775.0,4809.0,4650.0,6875.0,10452.0,25779.0,63222.0,54686.0,103439.0,77511.0,142976.0,109866.0,196920.0,...,159.0,174.0,160.0,150.0,155.0,158.0,165.0,137.0,140.0,143.0,138.0,140.0,159.0,138.0,167.0,145.0,130.0,162.0,142.0,135.0,169.0,136.0,152.0,138.0,151.0,178.0,172.0,169.0,285.0,261.0,281.0,304.0,417.0,479.0,420.0,1101.0,507.0,1698.0,422.0,15063.0
4,video7367-112,705.0,179.0,303.0,347.0,659.0,926.0,1994.0,1909.0,4553.0,5304.0,6813.0,9009.0,9973.0,11593.0,7953.0,12804.0,11786.0,15281.0,14185.0,13404.0,14749.0,9323.0,14656.0,11200.0,11913.0,10522.0,9923.0,10168.0,7611.0,10057.0,8069.0,9442.0,8439.0,9148.0,9904.0,7381.0,9368.0,8640.0,9037.0,...,2713.0,2577.0,2046.0,1691.0,2523.0,2910.0,2115.0,1520.0,1773.0,1411.0,1338.0,1637.0,1779.0,1777.0,1573.0,1860.0,1502.0,1508.0,1976.0,1957.0,1792.0,1399.0,1350.0,1168.0,1281.0,1242.0,1287.0,880.0,820.0,752.0,453.0,308.0,186.0,139.0,97.0,100.0,106.0,92.0,83.0,200.0


Creating a CSV file for Colour Histogram data frame. This CSV file will be used later. You can directly skip to the part where HMP data is being loaded **after running the code below**.

In [None]:
CH.to_csv("/content/drive/MyDrive/colourHistogram.csv")

In [None]:
k = CH["video"].str.split("-", expand = True)
CH["video"], CH["split"] = k[0], k[1]
CH_a = CH[CH["split"] == "0" ].copy()
CH_b = CH[CH["split"] == "56" ].copy()
CH_c = CH[CH["split"] == "112" ].copy()

CH_a.drop(["split"], axis = 1, inplace = True)
CH_b.drop(["split"], axis = 1, inplace = True)
CH_c.drop(["split"], axis = 1, inplace = True)

CH_new = pd.merge(CH_a, CH_b, on = ["video", "video"])
CH_new = pd.merge(CH_new, CH_c, on = ["video", "video"])

del CH_a
del CH_b
del CH_c
del CH

In [None]:
CH_new.head()

Unnamed: 0,video,CH_R_0_x,CH_R_1_x,CH_R_2_x,CH_R_3_x,CH_R_4_x,CH_R_5_x,CH_R_6_x,CH_R_7_x,CH_R_8_x,CH_R_9_x,CH_R_10_x,CH_R_11_x,CH_R_12_x,CH_R_13_x,CH_R_14_x,CH_R_15_x,CH_R_16_x,CH_R_17_x,CH_R_18_x,CH_R_19_x,CH_R_20_x,CH_R_21_x,CH_R_22_x,CH_R_23_x,CH_R_24_x,CH_R_25_x,CH_R_26_x,CH_R_27_x,CH_R_28_x,CH_R_29_x,CH_R_30_x,CH_R_31_x,CH_R_32_x,CH_R_33_x,CH_R_34_x,CH_R_35_x,CH_R_36_x,CH_R_37_x,CH_R_38_x,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7370,94.0,59.0,71.0,96.0,212.0,230.0,438.0,422.0,766.0,796.0,1256.0,1555.0,2213.0,2520.0,2784.0,3605.0,3664.0,3746.0,3925.0,3878.0,4326.0,3640.0,4141.0,3894.0,4164.0,3659.0,4775.0,4809.0,4650.0,6875.0,10452.0,25779.0,63222.0,54686.0,103439.0,77511.0,142976.0,109866.0,196920.0,...,16.0,12.0,17.0,12.0,9.0,3.0,17.0,11.0,14.0,18.0,12.0,13.0,11.0,18.0,25.0,19.0,24.0,18.0,17.0,16.0,20.0,30.0,15.0,2.0,0.0,3.0,3.0,2.0,1.0,5.0,4.0,6.0,4.0,2.0,4.0,1.0,4.0,7.0,3.0,26.0
1,video7367,717.0,182.0,305.0,446.0,784.0,1107.0,2079.0,2072.0,4083.0,5349.0,6871.0,8291.0,8685.0,9823.0,6367.0,11485.0,12691.0,13899.0,13987.0,12573.0,13352.0,8642.0,13460.0,11907.0,12011.0,10837.0,9803.0,10017.0,7281.0,9816.0,8920.0,10015.0,8629.0,9570.0,9358.0,6762.0,9500.0,9194.0,8340.0,...,2713.0,2577.0,2046.0,1691.0,2523.0,2910.0,2115.0,1520.0,1773.0,1411.0,1338.0,1637.0,1779.0,1777.0,1573.0,1860.0,1502.0,1508.0,1976.0,1957.0,1792.0,1399.0,1350.0,1168.0,1281.0,1242.0,1287.0,880.0,820.0,752.0,453.0,308.0,186.0,139.0,97.0,100.0,106.0,92.0,83.0,200.0
2,video737,17266.0,4212.0,4638.0,4663.0,5116.0,5401.0,5809.0,6021.0,6516.0,6602.0,6847.0,7125.0,7466.0,7511.0,7929.0,8121.0,8531.0,8470.0,9093.0,8744.0,9250.0,9220.0,9809.0,9490.0,9976.0,10066.0,10386.0,10391.0,10239.0,10324.0,10409.0,10289.0,10202.0,10204.0,10495.0,10084.0,10408.0,9933.0,10124.0,...,5470.0,6388.0,5295.0,4617.0,4739.0,3786.0,3467.0,3152.0,3017.0,3074.0,2501.0,2619.0,3077.0,2750.0,3205.0,2783.0,2585.0,1733.0,1868.0,1599.0,1329.0,1395.0,1172.0,995.0,933.0,998.0,774.0,601.0,705.0,472.0,530.0,345.0,346.0,298.0,220.0,240.0,165.0,189.0,101.0,436.0
3,video7372,35998.0,16962.0,11110.0,23543.0,12793.0,14267.0,8738.0,3737.0,6146.0,2831.0,6129.0,4411.0,7592.0,4850.0,2504.0,5283.0,6041.0,8394.0,5860.0,6919.0,8274.0,6484.0,7278.0,14287.0,14911.0,1964.0,19876.0,28576.0,28272.0,28104.0,20063.0,22057.0,1925.0,23540.0,17285.0,34654.0,35122.0,22332.0,21624.0,...,169.0,225.0,160.0,147.0,241.0,168.0,192.0,166.0,177.0,142.0,105.0,154.0,117.0,133.0,109.0,142.0,104.0,100.0,117.0,132.0,150.0,113.0,123.0,89.0,102.0,106.0,118.0,84.0,81.0,76.0,87.0,72.0,99.0,140.0,162.0,321.0,302.0,808.0,487.0,2653.0
4,video7376,34.0,21.0,33.0,40.0,118.0,199.0,258.0,598.0,543.0,731.0,561.0,692.0,556.0,620.0,703.0,670.0,895.0,1440.0,2123.0,2143.0,2888.0,3024.0,5788.0,5646.0,6297.0,5067.0,6195.0,7599.0,7739.0,7428.0,7690.0,7469.0,7753.0,9890.0,9826.0,10131.0,8969.0,10361.0,9353.0,...,9445.0,11948.0,10816.0,11807.0,8095.0,10510.0,10612.0,13586.0,14285.0,10792.0,7632.0,6720.0,9489.0,10169.0,12251.0,10802.0,7248.0,5148.0,3064.0,5452.0,4283.0,7152.0,5347.0,5256.0,4880.0,2259.0,6011.0,10054.0,15960.0,5142.0,5869.0,1960.0,4839.0,9876.0,9623.0,14917.0,6472.0,8551.0,361.0,235536.0


In [None]:
CH_new = pd.merge(CH_new, gTruth, on = ["video", "video"])
CH_new.head(2)

Unnamed: 0,video,CH_R_0_x,CH_R_1_x,CH_R_2_x,CH_R_3_x,CH_R_4_x,CH_R_5_x,CH_R_6_x,CH_R_7_x,CH_R_8_x,CH_R_9_x,CH_R_10_x,CH_R_11_x,CH_R_12_x,CH_R_13_x,CH_R_14_x,CH_R_15_x,CH_R_16_x,CH_R_17_x,CH_R_18_x,CH_R_19_x,CH_R_20_x,CH_R_21_x,CH_R_22_x,CH_R_23_x,CH_R_24_x,CH_R_25_x,CH_R_26_x,CH_R_27_x,CH_R_28_x,CH_R_29_x,CH_R_30_x,CH_R_31_x,CH_R_32_x,CH_R_33_x,CH_R_34_x,CH_R_35_x,CH_R_36_x,CH_R_37_x,CH_R_38_x,...,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255,short-term_memorability,long-term_memorability
0,video7370,94.0,59.0,71.0,96.0,212.0,230.0,438.0,422.0,766.0,796.0,1256.0,1555.0,2213.0,2520.0,2784.0,3605.0,3664.0,3746.0,3925.0,3878.0,4326.0,3640.0,4141.0,3894.0,4164.0,3659.0,4775.0,4809.0,4650.0,6875.0,10452.0,25779.0,63222.0,54686.0,103439.0,77511.0,142976.0,109866.0,196920.0,...,17.0,12.0,9.0,3.0,17.0,11.0,14.0,18.0,12.0,13.0,11.0,18.0,25.0,19.0,24.0,18.0,17.0,16.0,20.0,30.0,15.0,2.0,0.0,3.0,3.0,2.0,1.0,5.0,4.0,6.0,4.0,2.0,4.0,1.0,4.0,7.0,3.0,26.0,0.951,0.938
1,video7367,717.0,182.0,305.0,446.0,784.0,1107.0,2079.0,2072.0,4083.0,5349.0,6871.0,8291.0,8685.0,9823.0,6367.0,11485.0,12691.0,13899.0,13987.0,12573.0,13352.0,8642.0,13460.0,11907.0,12011.0,10837.0,9803.0,10017.0,7281.0,9816.0,8920.0,10015.0,8629.0,9570.0,9358.0,6762.0,9500.0,9194.0,8340.0,...,2046.0,1691.0,2523.0,2910.0,2115.0,1520.0,1773.0,1411.0,1338.0,1637.0,1779.0,1777.0,1573.0,1860.0,1502.0,1508.0,1976.0,1957.0,1792.0,1399.0,1350.0,1168.0,1281.0,1242.0,1287.0,880.0,820.0,752.0,453.0,308.0,186.0,139.0,97.0,100.0,106.0,92.0,83.0,200.0,0.858,1.0


In [None]:
CH_train, CH_test = train_test_split(CH_new, test_size = 0.2)

In [None]:
CH_train_x, CH_train_y_short, CH_train_y_long, CH_test_x, CH_test_y_short, CH_test_y_long = CH_train.drop(["short-term_memorability", "long-term_memorability"], axis = 1), CH_train["short-term_memorability"], CH_train["long-term_memorability"], CH_test.drop(["short-term_memorability", "long-term_memorability"], axis = 1), CH_test["short-term_memorability"], CH_test["long-term_memorability"]

In [None]:
CH_train_x, CH_test_x = CH_train_x.drop(["video"], axis = 1), CH_test_x.drop(["video"], axis = 1)

Testing models for CH (this can be skipped)

In [None]:
cols = CH_train_x.columns
sc = StandardScaler()
sc.fit(CH_train_x)
CH_train_x = pd.DataFrame(sc.transform(CH_train_x), columns = cols)
CH_test_x = pd.DataFrame(sc.transform(CH_test_x), columns = cols)

In [None]:
#Short term SVR()
svr_ch = SVR()
svr_ch.fit(CH_train_x, CH_train_y_short)

svr_ch_pred_short = svr_ch.predict(CH_test_x)
Get_score(svr_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.177


In [None]:
#Long term SVR()
svr_ch = SVR()
svr_ch.fit(CH_train_x, CH_train_y_long)

svr_ch_pred_long = svr_ch.predict(CH_test_x)
Get_score(svr_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.092


In [None]:
#Short term XGBoost
xgb_ch = XGBRegressor(objective = "reg:squarederror")
xgb_ch.fit(CH_train_x, CH_train_y_short)

xgb_ch_pred_short = xgb_ch.predict(CH_test_x)
Get_score(xgb_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.242


In [None]:
#Long term XGBoost
xgb_ch = XGBRegressor(objective = "reg:squarederror")
xgb_ch.fit(CH_train_x, CH_train_y_long)

xgb_ch_pred_long = xgb_ch.predict(CH_test_x)
Get_score(xgb_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.100


In [None]:
#Short term KNN
knn_ch = KNeighborsRegressor()
knn_ch.fit(CH_train_x, CH_train_y_short)

knn_ch_pred_short = knn_ch.predict(CH_test_x)
Get_score(knn_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.148


In [None]:
#Long term KNN
knn_ch = KNeighborsRegressor()
knn_ch.fit(CH_train_x, CH_train_y_long)

knn_ch_pred_long = knn_ch.predict(CH_test_x)
Get_score(knn_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.093


In [None]:
#Short term Random Forest
rf_ch = RandomForestRegressor()
rf_ch.fit(CH_train_x, CH_train_y_short)

rf_ch_pred_short = rf_ch.predict(CH_test_x)
Get_score(rf_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.254


In [None]:
#Long term Random Forest
rf_ch = RandomForestRegressor()
rf_ch.fit(CH_train_x, CH_train_y_long)

rf_ch_pred_long = rf_ch.predict(CH_test_x)
Get_score(rf_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.106


In [None]:
#Short term extra trees
et_ch = ExtraTreesRegressor()
et_ch.fit(CH_train_x, CH_train_y_short)

et_ch_pred_short = et_ch.predict(CH_test_x)
Get_score(et_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.257


In [None]:
#Long term extra trees
et_ch = ExtraTreesRegressor()
et_ch.fit(CH_train_x, CH_train_y_long)

et_ch_pred_long = et_ch.predict(CH_test_x)
Get_score(et_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.062


In [None]:
#short term Ada Boost
adb_ch = AdaBoostRegressor()
adb_ch.fit(CH_train_x, CH_train_y_short)

adb_ch_pred_short = adb_ch.predict(CH_test_x)
Get_score(adb_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.154


In [None]:
#Long term Ada Boost
adb_ch = AdaBoostRegressor()
adb_ch.fit(CH_train_x, CH_train_y_long)

adb_ch_pred_long = adb_ch.predict(CH_test_x)
Get_score(adb_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.042


In [None]:
#Short term Gradient boost
gbr_ch = GradientBoostingRegressor()
gbr_ch.fit(CH_train_x, CH_train_y_short)

gbr_ch_pred_short = gbr_ch.predict(CH_test_x)
Get_score(gbr_ch_pred_short, CH_test_y_short)

The Spearman's correlation coefficient is: 0.232


In [None]:
#Long term Gradient boost
gbr_ch = GradientBoostingRegressor()
gbr_ch.fit(CH_train_x, CH_train_y_long)

gbr_ch_pred_long = gbr_ch.predict(CH_test_x)
Get_score(gbr_ch_pred_long, CH_test_y_long)

The Spearman's correlation coefficient is: 0.067


## **Loading HMP data and testing models**

In [23]:
#===================================HMP========================================
HMP_path = '/content/drive/MyDrive/CA684_Assignment/Dev-set/HMP'
HMP_list = []

pbar = pyprind.ProgBar(len(os.listdir(HMP_path)), title='Importing HMP files')

for file in os.listdir(HMP_path):
    path = os.path.join(HMP_path, file)
    HMP_return = list(read_HMP(path))
    file= file.replace(".txt","")
    HMP_list.append([file] + HMP_return)
    pbar.update()

HMP_cols = ["HMP_"+str(i) for i in range(1, 6076)]
HMP_cols = ["video"] + HMP_cols
HMP = pd.DataFrame(HMP_list, columns = HMP_cols)
del HMP_list
del HMP_cols
HMP.head()

Importing HMP files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:42:24


Unnamed: 0,video,HMP_1,HMP_2,HMP_3,HMP_4,HMP_5,HMP_6,HMP_7,HMP_8,HMP_9,HMP_10,HMP_11,HMP_12,HMP_13,HMP_14,HMP_15,HMP_16,HMP_17,HMP_18,HMP_19,HMP_20,HMP_21,HMP_22,HMP_23,HMP_24,HMP_25,HMP_26,HMP_27,HMP_28,HMP_29,HMP_30,HMP_31,HMP_32,HMP_33,HMP_34,HMP_35,HMP_36,HMP_37,HMP_38,HMP_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video6634,0.012099,0.001466,1.7e-05,0.0,0.000683,7e-06,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,0.00017,2e-06,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000236,0.000239,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05
1,video6633,0.07817,0.013597,0.000122,0.0,0.00559,7.4e-05,0.0,0.0,2.6e-05,0.0,0.0,0.0,0.0,0.001137,1.7e-05,0.0,0.0,1.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.8e-05,6.9e-05,9.1e-05,2.6e-05,6.1e-05,2.2e-05,4e-06,9.1e-05,4.8e-05,1.3e-05,4.3e-05,9.5e-05,3e-05,4.3e-05,1.3e-05,0.0,3.9e-05,2.6e-05,0.0,9e-06,1.3e-05,0.0,6.9e-05,2.6e-05,4e-06,0.000143,0.000139,9e-06,1.7e-05,4.3e-05,2.2e-05,4.3e-05,2.6e-05,4e-06,7.8e-05,7.4e-05,2.2e-05,5.2e-05,8.2e-05,6.1e-05
2,video6639,0.112506,0.014053,0.000263,0.0,0.043205,0.00056,0.0,5e-06,0.000364,0.0,0.0,0.0,0.0,0.002472,0.000206,0.0,2e-06,6.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000184,0.00031,0.000629,0.000263,0.000145,6.1e-05,2e-06,9.8e-05,0.000248,7e-06,0.000125,0.000354,7.6e-05,0.000135,4.9e-05,0.0,7.4e-05,0.000103,0.0,1.2e-05,3.9e-05,2e-06,6.6e-05,1.2e-05,2e-06,9.8e-05,0.000221,1.2e-05,2.9e-05,7.6e-05,2.2e-05,0.000261,6.9e-05,0.0,0.000157,0.000558,1.7e-05,0.000285,0.000504,0.000179
3,video6635,0.066072,0.012682,0.000283,0.0,0.006425,0.000375,0.0,2e-06,0.000174,0.0,0.0,0.0,0.0,0.002373,0.000152,0.0,2e-06,0.000102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000492,0.000278,0.000744,0.000484,0.000155,7.7e-05,0.0,0.000161,0.000563,8.4e-05,0.000132,0.000375,0.000358,0.000227,7.5e-05,2e-06,7.7e-05,0.000135,7e-06,9e-06,1.1e-05,7e-06,0.00019,4.9e-05,4e-06,0.000305,0.001002,9.9e-05,3.8e-05,0.000106,7.5e-05,0.000495,0.00021,2e-06,0.000342,0.002027,0.000124,0.000307,0.001239,0.001047
4,video6637,0.03036,0.005834,0.000169,0.0,0.006578,0.000542,0.0,1.6e-05,0.000373,0.0,0.0,0.0,0.0,0.001041,0.000113,0.0,7e-06,0.000102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000334,0.000696,0.001019,0.000197,0.000239,0.000111,1.1e-05,0.000298,0.000389,5.6e-05,0.000291,0.000395,9e-05,0.000115,5.2e-05,2e-06,5.4e-05,7e-05,7e-06,1.4e-05,4.1e-05,1.1e-05,0.000124,4.5e-05,5e-06,0.000176,0.000181,2.9e-05,5.2e-05,9e-05,2.7e-05,0.000199,6.1e-05,2e-06,0.000228,0.000253,2.3e-05,0.000359,0.000375,8.8e-05


Creating a CSV file for Colour Histogram data frame. This CSV file will be used later. You can directly skip to the part where features are being merged **after running the code below.**

In [24]:
HMP.to_csv("/content/drive/MyDrive/HMP.csv")

In [None]:
HMP = HMP.merge(gTruth, on=["video"], how="inner")
HMP.head()

Unnamed: 0,video,HMP_1,HMP_2,HMP_3,HMP_4,HMP_5,HMP_6,HMP_7,HMP_8,HMP_9,HMP_10,HMP_11,HMP_12,HMP_13,HMP_14,HMP_15,HMP_16,HMP_17,HMP_18,HMP_19,HMP_20,HMP_21,HMP_22,HMP_23,HMP_24,HMP_25,HMP_26,HMP_27,HMP_28,HMP_29,HMP_30,HMP_31,HMP_32,HMP_33,HMP_34,HMP_35,HMP_36,HMP_37,HMP_38,HMP_39,...,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075,short-term_memorability,long-term_memorability
0,video6634,0.012099,0.001466,1.7e-05,0.0,0.000683,7e-06,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,0.00017,2e-06,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05,0.743,0.727
1,video6633,0.07817,0.013597,0.000122,0.0,0.00559,7.4e-05,0.0,0.0,2.6e-05,0.0,0.0,0.0,0.0,0.001137,1.7e-05,0.0,0.0,1.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.1e-05,2.6e-05,6.1e-05,2.2e-05,4e-06,9.1e-05,4.8e-05,1.3e-05,4.3e-05,9.5e-05,3e-05,4.3e-05,1.3e-05,0.0,3.9e-05,2.6e-05,0.0,9e-06,1.3e-05,0.0,6.9e-05,2.6e-05,4e-06,0.000143,0.000139,9e-06,1.7e-05,4.3e-05,2.2e-05,4.3e-05,2.6e-05,4e-06,7.8e-05,7.4e-05,2.2e-05,5.2e-05,8.2e-05,6.1e-05,0.804,0.727
2,video6639,0.112506,0.014053,0.000263,0.0,0.043205,0.00056,0.0,5e-06,0.000364,0.0,0.0,0.0,0.0,0.002472,0.000206,0.0,2e-06,6.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000629,0.000263,0.000145,6.1e-05,2e-06,9.8e-05,0.000248,7e-06,0.000125,0.000354,7.6e-05,0.000135,4.9e-05,0.0,7.4e-05,0.000103,0.0,1.2e-05,3.9e-05,2e-06,6.6e-05,1.2e-05,2e-06,9.8e-05,0.000221,1.2e-05,2.9e-05,7.6e-05,2.2e-05,0.000261,6.9e-05,0.0,0.000157,0.000558,1.7e-05,0.000285,0.000504,0.000179,0.645,0.8
3,video6635,0.066072,0.012682,0.000283,0.0,0.006425,0.000375,0.0,2e-06,0.000174,0.0,0.0,0.0,0.0,0.002373,0.000152,0.0,2e-06,0.000102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000744,0.000484,0.000155,7.7e-05,0.0,0.000161,0.000563,8.4e-05,0.000132,0.000375,0.000358,0.000227,7.5e-05,2e-06,7.7e-05,0.000135,7e-06,9e-06,1.1e-05,7e-06,0.00019,4.9e-05,4e-06,0.000305,0.001002,9.9e-05,3.8e-05,0.000106,7.5e-05,0.000495,0.00021,2e-06,0.000342,0.002027,0.000124,0.000307,0.001239,0.001047,0.806,0.923
4,video6637,0.03036,0.005834,0.000169,0.0,0.006578,0.000542,0.0,1.6e-05,0.000373,0.0,0.0,0.0,0.0,0.001041,0.000113,0.0,7e-06,0.000102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.001019,0.000197,0.000239,0.000111,1.1e-05,0.000298,0.000389,5.6e-05,0.000291,0.000395,9e-05,0.000115,5.2e-05,2e-06,5.4e-05,7e-05,7e-06,1.4e-05,4.1e-05,1.1e-05,0.000124,4.5e-05,5e-06,0.000176,0.000181,2.9e-05,5.2e-05,9e-05,2.7e-05,0.000199,6.1e-05,2e-06,0.000228,0.000253,2.3e-05,0.000359,0.000375,8.8e-05,0.865,0.786


In [None]:
HMP_train, HMP_test = train_test_split(HMP, test_size = 0.2)

In [None]:
HMP_train_x, HMP_train_y_short, HMP_train_y_long, HMP_test_x, HMP_test_y_short, HMP_test_y_long = HMP_train.drop(["short-term_memorability", "long-term_memorability"], axis = 1), HMP_train["short-term_memorability"], HMP_train["long-term_memorability"], HMP_test.drop(["short-term_memorability", "long-term_memorability"], axis = 1), HMP_test["short-term_memorability"], HMP_test["long-term_memorability"]

In [None]:
HMP_train_x, HMP_test_x = HMP_train_x.drop(["video"], axis = 1), HMP_test_x.drop(["video"], axis = 1)

Testing different models for HMP (can be skipped)

In [None]:
cols = HMP_train_x.columns
sc = StandardScaler()
sc.fit(HMP_train_x)
HMP_train_x = pd.DataFrame(sc.transform(HMP_train_x), columns = cols)
HMP_test_x = pd.DataFrame(sc.transform(HMP_test_x), columns = cols)

In [None]:
#Short term SVR
svrHMP = SVR()
svrHMP.fit(HMP_train_x, HMP_train_y_short)

svrHMPShort = svrHMP.predict(HMP_test_x)
Get_score(svrHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.246


In [None]:
#Long term SVR
svrHMP = SVR()
svrHMP.fit(HMP_train_x, HMP_train_y_long)

svrHMPLong = svrHMP.predict(HMP_test_x)
Get_score(svrHMPLong, HMP_test_y_long) 

The Spearman's correlation coefficient is: 0.097


In [None]:
#Short term XGB
xgbHMP = XGBRegressor(objective = "reg:squarederror")
xgbHMP.fit(HMP_train_x, HMP_train_y_short)

xgbHMPShort = xgbHMP.predict(HMP_test_x)
Get_score(xgbHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.261


In [None]:
#Long term XGB
xgbHMP = XGBRegressor(objective = "reg:squarederror")
xgbHMP.fit(HMP_train_x, HMP_train_y_long)

xgbHMPTerm = xgbHMP.predict(HMP_test_x)
Get_score(xgbHMPTerm, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.113


In [None]:
#Short term KNN
knnHMP = KNeighborsRegressor()
knnHMP.fit(HMP_train_x, HMP_train_y_short)

knnHMPShort = knnHMP.predict(HMP_test_x)
Get_score(knnHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.188


In [None]:
#Long term KNN
knnHMP = KNeighborsRegressor()
knnHMP.fit(HMP_train_x, HMP_train_y_long)

knnHMPLong = knnHMP.predict(HMP_test_x)
Get_score(knnHMPLong, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.090


In [None]:
#Short term Random forest
randomForestHMP = RandomForestRegressor()
randomForestHMP.fit(HMP_train_x, HMP_train_y_short)

rfHMPShort = randomForestHMP.predict(HMP_test_x)
Get_score(rfHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.280


In [None]:
#Long term Random forest
randomForestHMP = RandomForestRegressor()
randomForestHMP.fit(HMP_train_x, HMP_train_y_long)

rfHMPLong = randomForestHMP.predict(HMP_test_x)
Get_score(rfHMPLong, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.138


In [None]:
#Short term Extra trees
etHMP = ExtraTreesRegressor()
etHMP.fit(HMP_train_x, HMP_train_y_short)

etHMPShort = etHMP.predict(HMP_test_x)
Get_score(etHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.263


In [None]:
#Long term Extra trees
etHMP = ExtraTreesRegressor()
etHMP.fit(HMP_train_x, HMP_train_y_long)

etHMPLong = etHMP.predict(HMP_test_x)
Get_score(etHMPLong, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.110


In [None]:
#Short term Ada Boost
adaBoostHMP = AdaBoostRegressor()
adaBoostHMP.fit(HMP_train_x, HMP_train_y_short)

adaBoostHMPShort = adaBoostHMP.predict(HMP_test_x)
Get_score(adaBoostHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.194


In [None]:
#Long term Ada Boost
adaBoostHMP = AdaBoostRegressor()
adaBoostHMP.fit(HMP_train_x, HMP_train_y_long)

adaBoostHMPLong = adaBoostHMP.predict(HMP_test_x)
Get_score(adaBoostHMPLong, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.041


In [None]:
#Short term Gradient boost
gBoostHMP = GradientBoostingRegressor()
gBoostHMP.fit(HMP_train_x, HMP_train_y_short)

gBoostHMPShort = gBoostHMP.predict(HMP_test_x)
Get_score(gBoostHMPShort, HMP_test_y_short)

The Spearman's correlation coefficient is: 0.250


In [None]:
#Long term gradient boost
gBoostHMP = GradientBoostingRegressor()
gBoostHMP.fit(HMP_train_x, HMP_train_y_long)

gBoostHMPLong = gBoostHMP.predict(HMP_test_x)
Get_score(gBoostHMPLong, HMP_test_y_long)

The Spearman's correlation coefficient is: 0.118


## **Merging 3 features and testing models**

If the code has run till here atleast once, i.e. the CSV files have been created, there are chances that it might run into memory issues. Restart the session and start from this point after running the initialization section **(assuming the google drive didn't change)**.

Loading values from CSVs saved above

In [36]:
#=================================Merging features and predicting===================================

In [None]:
#=====================================C3D==============================
#Loading C3D
C3D = pd.read_csv("/content/drive/MyDrive/C3D.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
C3D.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,C3D_62,C3D_63,C3D_64,C3D_65,C3D_66,C3D_67,C3D_68,C3D_69,C3D_70,C3D_71,C3D_72,C3D_73,C3D_74,C3D_75,C3D_76,C3D_77,C3D_78,C3D_79,C3D_80,C3D_81,C3D_82,C3D_83,C3D_84,C3D_85,C3D_86,C3D_87,C3D_88,C3D_89,C3D_90,C3D_91,C3D_92,C3D_93,C3D_94,C3D_95,C3D_96,C3D_97,C3D_98,C3D_99,C3D_100,C3D_101
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,0.001623,0.970125,1.6e-05,0.001298,3.2e-05,1e-06,0.0,0.0,1e-08,2.5e-07,0.0,0.0,5e-08,0.0,1e-08,1e-08,4.2e-07,0.0,3e-08,2e-08,0.0,0.0,0.0,6e-08,0.0,0.0,9e-08,0.0,0.0,1.1e-07,1.4e-07,0.0,0.0,1.7e-07,0.0,0.0,1e-08,1.3e-06,2.6e-06,8e-08
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,4.5e-05,2.7e-05,1.3e-05,7.7e-05,5e-05,0.000138,1.2e-05,7e-06,4.1e-07,5.44e-06,3.4e-07,7e-06,8.399e-05,4e-06,3.97e-06,5.9e-07,0.00012853,8.8e-07,2.228e-05,1.105e-05,3e-06,1.5e-05,1.4e-05,4.09e-06,3e-06,5e-06,4.92e-06,5e-06,6.4e-07,2.372e-05,6.61e-06,1e-05,2e-06,5.8e-06,1e-06,1e-06,1.17e-05,1.5e-07,8.3e-07,0.000106


In [None]:
#================================COLOUR HISTOGRAM=============================
CH = pd.read_csv("/content/drive/MyDrive/colourHistogram.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
CH.head(2)

Unnamed: 0,video,CH_R_0,CH_R_1,CH_R_2,CH_R_3,CH_R_4,CH_R_5,CH_R_6,CH_R_7,CH_R_8,CH_R_9,CH_R_10,CH_R_11,CH_R_12,CH_R_13,CH_R_14,CH_R_15,CH_R_16,CH_R_17,CH_R_18,CH_R_19,CH_R_20,CH_R_21,CH_R_22,CH_R_23,CH_R_24,CH_R_25,CH_R_26,CH_R_27,CH_R_28,CH_R_29,CH_R_30,CH_R_31,CH_R_32,CH_R_33,CH_R_34,CH_R_35,CH_R_36,CH_R_37,CH_R_38,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7365-56,21713.0,3059.0,3169.0,3088.0,3156.0,3287.0,3411.0,3508.0,3725.0,3761.0,3879.0,4184.0,4347.0,4776.0,4893.0,5452.0,5934.0,6391.0,7001.0,7094.0,8119.0,8523.0,9025.0,9385.0,10135.0,10513.0,11072.0,11354.0,11989.0,12068.0,12828.0,12209.0,12996.0,12179.0,12962.0,12388.0,12976.0,12881.0,13598.0,...,9792.0,11763.0,13870.0,16721.0,19241.0,20207.0,18109.0,13610.0,11246.0,8668.0,6698.0,5636.0,4512.0,3818.0,3429.0,3028.0,2747.0,2243.0,2165.0,1847.0,1626.0,1400.0,1391.0,1140.0,941.0,881.0,783.0,726.0,606.0,580.0,460.0,358.0,341.0,295.0,240.0,249.0,161.0,142.0,120.0,507.0
1,video7370-56,803.0,301.0,552.0,597.0,1001.0,1078.0,1548.0,1630.0,2278.0,2262.0,3295.0,3242.0,4241.0,4475.0,4338.0,5218.0,5166.0,5662.0,5537.0,6164.0,5786.0,5026.0,5350.0,4428.0,4363.0,3700.0,4009.0,4305.0,3595.0,10475.0,20380.0,62714.0,148230.0,176974.0,233718.0,95399.0,221205.0,123922.0,133073.0,...,52.0,56.0,46.0,47.0,47.0,43.0,58.0,62.0,46.0,46.0,39.0,48.0,37.0,53.0,43.0,56.0,36.0,50.0,52.0,47.0,51.0,54.0,59.0,54.0,41.0,68.0,49.0,71.0,60.0,78.0,91.0,75.0,93.0,89.0,104.0,125.0,315.0,235.0,72.0,230.0


In [None]:
k = CH["video"].str.split("-", expand = True)
CH["video"], CH["split"] = k[0], k[1]
CH_a = CH[CH["split"] == "0" ].copy()
CH_b = CH[CH["split"] == "56" ].copy()
CH_c = CH[CH["split"] == "112" ].copy()

CH_a.drop(["split"], axis = 1, inplace = True)
CH_b.drop(["split"], axis = 1, inplace = True)
CH_c.drop(["split"], axis = 1, inplace = True)

CH_new = pd.merge(CH_a, CH_b, on = ["video", "video"])
CH_new = pd.merge(CH_new, CH_c, on = ["video", "video"])

del CH_a
del CH_b
del CH_c
del CH

In [None]:
CH_new.head(2)

Unnamed: 0,video,CH_R_0_x,CH_R_1_x,CH_R_2_x,CH_R_3_x,CH_R_4_x,CH_R_5_x,CH_R_6_x,CH_R_7_x,CH_R_8_x,CH_R_9_x,CH_R_10_x,CH_R_11_x,CH_R_12_x,CH_R_13_x,CH_R_14_x,CH_R_15_x,CH_R_16_x,CH_R_17_x,CH_R_18_x,CH_R_19_x,CH_R_20_x,CH_R_21_x,CH_R_22_x,CH_R_23_x,CH_R_24_x,CH_R_25_x,CH_R_26_x,CH_R_27_x,CH_R_28_x,CH_R_29_x,CH_R_30_x,CH_R_31_x,CH_R_32_x,CH_R_33_x,CH_R_34_x,CH_R_35_x,CH_R_36_x,CH_R_37_x,CH_R_38_x,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7370,94.0,59.0,71.0,96.0,212.0,230.0,438.0,422.0,766.0,796.0,1256.0,1555.0,2213.0,2520.0,2784.0,3605.0,3664.0,3746.0,3925.0,3878.0,4326.0,3640.0,4141.0,3894.0,4164.0,3659.0,4775.0,4809.0,4650.0,6875.0,10452.0,25779.0,63222.0,54686.0,103439.0,77511.0,142976.0,109866.0,196920.0,...,16.0,12.0,17.0,12.0,9.0,3.0,17.0,11.0,14.0,18.0,12.0,13.0,11.0,18.0,25.0,19.0,24.0,18.0,17.0,16.0,20.0,30.0,15.0,2.0,0.0,3.0,3.0,2.0,1.0,5.0,4.0,6.0,4.0,2.0,4.0,1.0,4.0,7.0,3.0,26.0
1,video7367,717.0,182.0,305.0,446.0,784.0,1107.0,2079.0,2072.0,4083.0,5349.0,6871.0,8291.0,8685.0,9823.0,6367.0,11485.0,12691.0,13899.0,13987.0,12573.0,13352.0,8642.0,13460.0,11907.0,12011.0,10837.0,9803.0,10017.0,7281.0,9816.0,8920.0,10015.0,8629.0,9570.0,9358.0,6762.0,9500.0,9194.0,8340.0,...,2713.0,2577.0,2046.0,1691.0,2523.0,2910.0,2115.0,1520.0,1773.0,1411.0,1338.0,1637.0,1779.0,1777.0,1573.0,1860.0,1502.0,1508.0,1976.0,1957.0,1792.0,1399.0,1350.0,1168.0,1281.0,1242.0,1287.0,880.0,820.0,752.0,453.0,308.0,186.0,139.0,97.0,100.0,106.0,92.0,83.0,200.0


In [None]:
#===================================HMP========================================
HMP = pd.read_csv("/content/drive/MyDrive/HMP.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
HMP.head(2)

Unnamed: 0,video,HMP_1,HMP_2,HMP_3,HMP_4,HMP_5,HMP_6,HMP_7,HMP_8,HMP_9,HMP_10,HMP_11,HMP_12,HMP_13,HMP_14,HMP_15,HMP_16,HMP_17,HMP_18,HMP_19,HMP_20,HMP_21,HMP_22,HMP_23,HMP_24,HMP_25,HMP_26,HMP_27,HMP_28,HMP_29,HMP_30,HMP_31,HMP_32,HMP_33,HMP_34,HMP_35,HMP_36,HMP_37,HMP_38,HMP_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video6634,0.012099,0.001466,1.7e-05,0.0,0.000683,7e-06,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,0.00017,2e-06,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000236,0.000239,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05
1,video6633,0.07817,0.013597,0.000122,0.0,0.00559,7.4e-05,0.0,0.0,2.6e-05,0.0,0.0,0.0,0.0,0.001137,1.7e-05,0.0,0.0,1.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.8e-05,6.9e-05,9.1e-05,2.6e-05,6.1e-05,2.2e-05,4e-06,9.1e-05,4.8e-05,1.3e-05,4.3e-05,9.5e-05,3e-05,4.3e-05,1.3e-05,0.0,3.9e-05,2.6e-05,0.0,9e-06,1.3e-05,0.0,6.9e-05,2.6e-05,4e-06,0.000143,0.000139,9e-06,1.7e-05,4.3e-05,2.2e-05,4.3e-05,2.6e-05,4e-06,7.8e-05,7.4e-05,2.2e-05,5.2e-05,8.2e-05,6.1e-05


Merging the 3 features

In [None]:
#Merging C3D and CH
dfMerge = C3D.merge(CH_new, on=["video"], how="inner")
dfMerge.columns
dfMerge.head(3)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,1518.0,1632.0,2118.0,2313.0,1497.0,1244.0,976.0,920.0,1015.0,994.0,917.0,989.0,1028.0,853.0,817.0,753.0,840.0,754.0,704.0,710.0,598.0,709.0,596.0,701.0,605.0,649.0,741.0,570.0,756.0,593.0,652.0,414.0,398.0,426.0,212.0,193.0,142.0,124.0,125.0,179.0
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,1801.0,3058.0,2115.0,2743.0,3811.0,3733.0,2720.0,2406.0,2802.0,1807.0,2393.0,4360.0,4375.0,3569.0,3080.0,3590.0,1640.0,3185.0,4538.0,4417.0,4003.0,3156.0,3478.0,2527.0,2722.0,4916.0,4451.0,5572.0,4557.0,5925.0,6668.0,5087.0,12707.0,14920.0,16319.0,20552.0,44012.0,37042.0,12507.0,102085.0
2,video6633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#Merging (C3D + CH) with HMP
dfMerge = dfMerge.merge(HMP, on=["video"], how="inner")
dfMerge.columns
dfMerge.head(3)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,9.9e-05,0.000135,0.000274,5.8e-05,6.3e-05,4.7e-05,0.0,7.4e-05,0.000146,2e-05,7.2e-05,0.000205,5.2e-05,4.9e-05,2e-05,0.0,1.8e-05,3.6e-05,2e-06,2e-06,4e-06,0.0,2.5e-05,1.6e-05,0.0,5.2e-05,9.2e-05,9e-06,4e-06,1.3e-05,7e-06,4.9e-05,2.2e-05,0.0,4.9e-05,0.000164,9e-06,9.7e-05,0.000169,0.00011
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,0.000236,0.000239,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05
2,video6633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.8e-05,6.9e-05,9.1e-05,2.6e-05,6.1e-05,2.2e-05,4e-06,9.1e-05,4.8e-05,1.3e-05,4.3e-05,9.5e-05,3e-05,4.3e-05,1.3e-05,0.0,3.9e-05,2.6e-05,0.0,9e-06,1.3e-05,0.0,6.9e-05,2.6e-05,4e-06,0.000143,0.000139,9e-06,1.7e-05,4.3e-05,2.2e-05,4.3e-05,2.6e-05,4e-06,7.8e-05,7.4e-05,2.2e-05,5.2e-05,8.2e-05,6.1e-05


In [None]:
dfMerge = dfMerge.merge(gTruth, on=["video"], how="inner")
dfMerge.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075,short-term_memorability,long-term_memorability
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,0.000274,5.8e-05,6.3e-05,4.7e-05,0.0,7.4e-05,0.000146,2e-05,7.2e-05,0.000205,5.2e-05,4.9e-05,2e-05,0.0,1.8e-05,3.6e-05,2e-06,2e-06,4e-06,0.0,2.5e-05,1.6e-05,0.0,5.2e-05,9.2e-05,9e-06,4e-06,1.3e-05,7e-06,4.9e-05,2.2e-05,0.0,4.9e-05,0.000164,9e-06,9.7e-05,0.000169,0.00011,0.884,0.9
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05,0.743,0.727


Train and test data set splitting

In [None]:
df_train, df_test = train_test_split(dfMerge, test_size = 0.2)

In [None]:
df_train_x, df_train_y_short, df_train_y_long, df_test_x, df_test_y_short, df_test_y_long = df_train.drop(["short-term_memorability", "long-term_memorability"], axis = 1), df_train["short-term_memorability"], df_train["long-term_memorability"], df_test.drop(["short-term_memorability", "long-term_memorability"], axis = 1), df_test["short-term_memorability"], df_test["long-term_memorability"]

In [None]:
df_train_x, df_test_x = df_train_x.drop(["video"], axis = 1), df_test_x.drop(["video"], axis = 1)

In [None]:
cols = df_train_x.columns
sc = StandardScaler()
sc.fit(df_train_x)
df_train_x = pd.DataFrame(sc.transform(df_train_x), columns = cols)
df_test_x = pd.DataFrame(sc.transform(df_test_x), columns = cols)

In [None]:
#Short term SVR()
svrMod = SVR()
svrMod.fit(df_train_x, df_train_y_short)

svrShort = svrMod.predict(df_test_x)
Get_score(svrShort, df_test_y_short)

The Spearman's correlation coefficient is: 0.311


In [None]:
#Long term SVR()
svrMod = SVR()
svrMod.fit(df_train_x, df_train_y_long)

svrLong = svrMod.predict(df_test_x)
Get_score(svrLong, df_test_y_long)

The Spearman's correlation coefficient is: 0.146


In [None]:
#Short term XGB
xgbMod = XGBRegressor(objective = "reg:squarederror")
xgbMod.fit(df_train_x, df_train_y_short)

xgbShort = xgbMod.predict(df_test_x)
Get_score(xgbShort, df_test_y_short)

The Spearman's correlation coefficient is: 0.349


In [None]:
#Long term XBG
xgbMod = XGBRegressor(objective = "reg:squarederror")
xgbMod.fit(df_train_x, df_train_y_long)

xgbLong = xgbMod.predict(df_test_x)
Get_score(xgbLong, df_test_y_long)

The Spearman's correlation coefficient is: 0.143


In [None]:
#Short term Extra trees
etMod = ExtraTreesRegressor()
etMod.fit(df_train_x, df_train_y_short)

etShort = etMod.predict(df_test_x)
Get_score(etShort, df_test_y_short)

The Spearman's correlation coefficient is: 0.372


In [None]:
#Long term Extra trees
etMod = ExtraTreesRegressor()
etMod.fit(df_train_x, df_train_y_long)

etLong = etMod.predict(df_test_x)
Get_score(etLong, df_test_y_long)

The Spearman's correlation coefficient is: 0.148


In [None]:
#Short term Gradient boost
gBMod = GradientBoostingRegressor()
gBMod.fit(df_train_x, df_train_y_short)

gBShort = gBMod.predict(df_test_x)
Get_score(gBShort, df_test_y_short)

The Spearman's correlation coefficient is: 0.348


In [None]:
#Long term gradient boost
gBMod = GradientBoostingRegressor()
gBMod.fit(df_train_x, df_train_y_long)

gBLong = gBMod.predict(df_test_x)
Get_score(gBLong, df_test_y_long)

The Spearman's correlation coefficient is: 0.150


In [None]:
#Short term Random forest
rFMod = RandomForestRegressor()
rFMod.fit(df_train_x, df_train_y_short)

rFShort = rFMod.predict(df_test_x)
Get_score(rFShort, df_test_y_short)

The Spearman's correlation coefficient is: 0.372


In [None]:
#Long term Random forest
rFMod = RandomForestRegressor()
rFMod.fit(df_train_x, df_train_y_long)

rFLong = rFMod.predict(df_test_x)
Get_score(rFLong, df_test_y_long)

The Spearman's correlation coefficient is: 0.180


# **Loading test data and prediticting memorability**

## **Importing and merging test data for 3 features**

Importing data

In [25]:
#=====================================C3D==============================
#Loading C3D
C3D_path = '/content/drive/MyDrive/CA684_Assignment/Test-set/C3D_test/'

C3D_list = []

pbar = pyprind.ProgBar(len(os.listdir(C3D_path)), title='Importing C3D files')

for file in os.listdir(C3D_path):
    path = os.path.join(C3D_path, file)
    arrayFile = read_C3D(path)
    file= file.replace(".txt","")
    C3D_list.append([file] + arrayFile)
    pbar.update()

C3D_cols = ["C3D_"+str(i) for i in range(1, 102)]
C3D_cols = ["video"] + C3D_cols
C3D = pd.DataFrame(C3D_list, columns = C3D_cols)
del C3D_list
del C3D_cols
C3D.head()

Importing C3D files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:03


Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,C3D_62,C3D_63,C3D_64,C3D_65,C3D_66,C3D_67,C3D_68,C3D_69,C3D_70,C3D_71,C3D_72,C3D_73,C3D_74,C3D_75,C3D_76,C3D_77,C3D_78,C3D_79,C3D_80,C3D_81,C3D_82,C3D_83,C3D_84,C3D_85,C3D_86,C3D_87,C3D_88,C3D_89,C3D_90,C3D_91,C3D_92,C3D_93,C3D_94,C3D_95,C3D_96,C3D_97,C3D_98,C3D_99,C3D_100,C3D_101
0,video8768,0.003526,0.001376,0.046187,8e-06,4.1e-05,0.000295,0.000343,0.000406,7e-05,1.5e-05,0.00035398,0.000599,0.000349,0.004517,1.6e-05,5.5e-05,5.7e-05,1.536e-05,1.4e-05,0.000145,4.29e-06,0.153209,6.9e-07,2.9e-05,0.00232581,0.000133,0.002068,1.2e-05,2.6e-05,0.000155,9e-06,0.000481,0.000143,0.00035948,0.00100858,0.000114,4.4e-05,1.411e-05,0.000449,...,0.02175051,0.00024876,0.000621,0.00064,0.012609,0.000678,0.000394,0.003184,1.6e-05,0.000394,1.934e-05,0.000813,0.002481,4e-06,1.9e-05,4.92e-06,0.001956,3e-06,2.6e-05,6.5e-05,0.000133,0.000653,5.7e-05,9.7e-05,0.001134,0.00024,3.6e-05,2.2e-05,5.7e-05,0.000495,0.00114253,0.00032108,0.00016738,0.00581133,0.000252,4.4e-07,0.000568,5.1e-05,6.7e-05,0.682813
1,video8759,0.007772,0.002302,0.003671,4.8e-05,1.7e-05,1.8e-05,0.000262,0.000129,7.9e-05,2.7e-05,0.00011747,0.000437,0.003945,2.8e-05,9e-06,0.00084337,0.000109,1.924e-05,0.000801,1.3e-05,3.24e-06,0.001053,0.00102358,5.4e-05,0.01441502,2e-05,3e-05,8.7e-05,0.000483,0.00121,8.1e-05,0.000505,0.001729,0.00017307,0.00014976,0.000126,0.000186,0.00016908,0.000468,...,0.00273463,0.00165587,0.003851,0.000111,0.001212,0.000126,1.2e-05,0.000106,0.000187,0.000111,0.00037977,3.9e-05,7.7e-05,3.6e-05,0.000649,6.266e-05,0.000469,2.4e-05,0.001465,0.234685,0.000473,0.000303,0.000665,0.000614,6.7e-05,4e-06,0.001427,2e-06,0.000192,0.00164,0.0010991,0.0002225,1.11e-05,0.00086934,6.5e-05,6.15e-06,0.000271,0.017517,0.666933,0.002858
2,video8762,0.001573,0.001398,0.071723,2.5e-05,1.9e-05,0.000827,0.006838,0.000136,2.9e-05,0.000239,0.00056579,0.000672,0.000265,0.000454,1.3e-05,0.0003156,0.000511,8.04e-06,1e-05,4e-06,1.034e-05,0.001801,1.66e-06,4.7e-05,0.00046028,3.1e-05,0.000425,0.000133,0.001446,0.000248,0.000171,5.5e-05,0.006299,0.00026435,5.318e-05,3e-06,8.4e-05,0.00037179,0.000109,...,0.00456947,0.00397909,0.003753,0.002594,0.003642,0.030717,0.003481,0.000595,1.1e-05,0.000506,0.0001954,5.3e-05,0.000353,9e-06,0.000182,9.014e-05,0.000233,0.0019,0.000345,0.000182,0.000258,0.001014,5.4e-05,6.9e-05,0.000193,0.00141,0.000869,1.4e-05,8.6e-05,0.001698,4.608e-05,0.00146747,0.00012797,0.00010193,7e-06,3.577e-05,0.001544,3.4e-05,0.000837,0.804485
3,video8765,0.000358,0.003973,0.008803,0.007741,0.000403,0.000194,0.009095,0.017529,0.000285,2.6e-05,0.00032793,0.013337,7e-06,0.000169,0.000122,0.00068516,0.000816,3.79e-06,0.001249,0.000297,0.00173054,0.017458,0.00045758,0.011841,0.06208798,0.000916,3.3e-05,0.001933,0.00135,0.00305,0.000202,0.138158,0.002006,2.664e-05,0.01112671,0.028939,0.007646,0.01535432,0.006985,...,0.00025708,0.00088913,0.001144,0.003669,0.006591,0.000239,0.004588,0.003227,0.000484,0.000935,0.06927425,0.000498,0.0166,0.000226,0.086639,0.00010256,0.001129,2.6e-05,0.004527,0.003472,0.045509,0.00668,0.001158,0.001092,0.019399,0.00162,0.004252,0.003154,0.000559,0.014796,0.01822863,0.05824094,0.00266222,0.02824549,0.000521,5.476e-05,0.002431,0.000706,0.045552,0.001591
4,video8758,1e-06,1.2e-05,2.5e-05,1.2e-05,5.8e-05,0.000572,2e-06,2e-06,3e-05,3e-06,1.4e-07,0.001003,0.000391,2e-06,4e-06,7.8e-07,8e-06,8.7e-07,0.000134,1e-06,9.8e-07,0.069743,3.31e-06,0.000528,2.8e-07,4.5e-05,1.3e-05,7e-06,2e-06,2e-06,0.03141,0.000437,3e-06,1.3e-07,2e-08,4.5e-05,0.000117,4.9e-07,1e-06,...,4.9e-07,6.5e-07,5e-06,3.6e-05,2.2e-05,2e-06,5.1e-05,2e-06,1.9e-05,3e-06,4e-07,0.000324,0.869472,0.00012,0.005333,2e-08,1e-06,2e-06,1e-06,1.7e-05,2.4e-05,0.002599,1e-05,0.010963,1.8e-05,1.1e-05,1.3e-05,3e-06,2e-06,3e-06,4.6e-07,1.9e-07,2.3e-07,4.5e-07,2e-06,0.00039646,2e-05,1e-06,2.6e-05,0.000154


In [12]:
C3D.to_csv("/content/drive/MyDrive/C3D_test.csv")

In [26]:
#================================COLOUR HISTOGRAM=============================
CH_path = '/content/drive/MyDrive/CA684_Assignment/Test-set/ColorHistogram_test/'

CH_list = []

pbar = pyprind.ProgBar(len(os.listdir(CH_path)), title='Importing ColorHistogram files')

for file in os.listdir(CH_path):
    path = os.path.join(CH_path, file)
    arrayFile = read_ColorHistogram(path)
    arrayFile = list(arrayFile[0]) + list(arrayFile[1]) + list(arrayFile[2])
    file= file.replace(".txt","")
    CH_list.append([file] + arrayFile)
    pbar.update()

CH_cols = ["CH_R_"+str(i) for i in range(0, 256)] + ["CH_G_"+str(i) for i in range(0, 256)] + ["CH_B_"+str(i) for i in range(0, 256)]
CH_cols = ["video"] + CH_cols
CH = pd.DataFrame(CH_list, columns = CH_cols)
del CH_list
del CH_cols

Importing ColorHistogram files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:16


In [15]:
CH.to_csv("/content/drive/MyDrive/colourHistogram_test.csv")

In [27]:
k = CH["video"].str.split("-", expand = True)
CH["video"], CH["split"] = k[0], k[1]
CH_a = CH[CH["split"] == "0" ].copy()
CH_b = CH[CH["split"] == "56" ].copy()
CH_c = CH[CH["split"] == "112" ].copy()

CH_a.drop(["split"], axis = 1, inplace = True)
CH_b.drop(["split"], axis = 1, inplace = True)
CH_c.drop(["split"], axis = 1, inplace = True)

CH_new = pd.merge(CH_a, CH_b, on = ["video", "video"])
CH_new = pd.merge(CH_new, CH_c, on = ["video", "video"])

del CH_a
del CH_b
del CH_c
del CH
CH_new.head(2)

Unnamed: 0,video,CH_R_0_x,CH_R_1_x,CH_R_2_x,CH_R_3_x,CH_R_4_x,CH_R_5_x,CH_R_6_x,CH_R_7_x,CH_R_8_x,CH_R_9_x,CH_R_10_x,CH_R_11_x,CH_R_12_x,CH_R_13_x,CH_R_14_x,CH_R_15_x,CH_R_16_x,CH_R_17_x,CH_R_18_x,CH_R_19_x,CH_R_20_x,CH_R_21_x,CH_R_22_x,CH_R_23_x,CH_R_24_x,CH_R_25_x,CH_R_26_x,CH_R_27_x,CH_R_28_x,CH_R_29_x,CH_R_30_x,CH_R_31_x,CH_R_32_x,CH_R_33_x,CH_R_34_x,CH_R_35_x,CH_R_36_x,CH_R_37_x,CH_R_38_x,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video9588,9048.0,14717.0,50970.0,96062.0,74515.0,95839.0,84684.0,56331.0,61446.0,48306.0,44556.0,39257.0,36397.0,38882.0,25511.0,32746.0,19863.0,35267.0,21932.0,25941.0,22828.0,13562.0,16046.0,10866.0,14264.0,11217.0,11939.0,12656.0,9375.0,12017.0,11338.0,13816.0,13546.0,14627.0,15711.0,12683.0,18227.0,15561.0,15567.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,video9591,129.0,37.0,48.0,79.0,251.0,121.0,477.0,393.0,549.0,566.0,483.0,805.0,586.0,1285.0,1317.0,960.0,963.0,865.0,1612.0,1351.0,1275.0,1350.0,1478.0,1648.0,1841.0,1874.0,1897.0,1989.0,2119.0,1854.0,2135.0,2040.0,2190.0,2058.0,2413.0,2645.0,2903.0,2881.0,3176.0,...,24234.0,21691.0,19194.0,25049.0,21325.0,24313.0,22739.0,24920.0,19246.0,20735.0,24482.0,19985.0,22183.0,20636.0,22367.0,18198.0,19920.0,21484.0,17827.0,20953.0,17775.0,18981.0,15188.0,15057.0,17455.0,16316.0,18073.0,15504.0,15779.0,12098.0,13112.0,14704.0,13629.0,17280.0,18267.0,22202.0,22453.0,29858.0,29952.0,52024.0


In [28]:
#===================================HMP========================================
HMP_path = '/content/drive/MyDrive/CA684_Assignment/Test-set/HMP_test/'
HMP_list = []

pbar = pyprind.ProgBar(len(os.listdir(HMP_path)), title='Importing HMP files')

for file in os.listdir(HMP_path):
    path = os.path.join(HMP_path, file)
    HMP_return = list(read_HMP(path))
    file= file.replace(".txt","")
    HMP_list.append([file] + HMP_return)
    pbar.update()

HMP_cols = ["HMP_"+str(i) for i in range(1, 6076)]
HMP_cols = ["video"] + HMP_cols
HMP = pd.DataFrame(HMP_list, columns = HMP_cols)
del HMP_list
del HMP_cols
HMP.head()

Importing HMP files
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:13


Unnamed: 0,video,HMP_1,HMP_2,HMP_3,HMP_4,HMP_5,HMP_6,HMP_7,HMP_8,HMP_9,HMP_10,HMP_11,HMP_12,HMP_13,HMP_14,HMP_15,HMP_16,HMP_17,HMP_18,HMP_19,HMP_20,HMP_21,HMP_22,HMP_23,HMP_24,HMP_25,HMP_26,HMP_27,HMP_28,HMP_29,HMP_30,HMP_31,HMP_32,HMP_33,HMP_34,HMP_35,HMP_36,HMP_37,HMP_38,HMP_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video8763,0.00206,0.000583,1.1e-05,0.0,0.000395,3.1e-05,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,3.5e-05,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000173,0.000901,0.001298,8.5e-05,0.00037,0.000221,7e-06,0.000421,0.000616,2.6e-05,0.000526,0.000793,3.3e-05,0.000346,0.000145,0.0,0.000129,0.000186,1.1e-05,2.2e-05,3.5e-05,0.0,0.000173,8.8e-05,4e-06,0.00027,0.000368,1.5e-05,8.3e-05,7.2e-05,4e-06,0.000366,0.000169,2e-06,0.000388,0.00059,3.3e-05,0.000655,0.000671,5.5e-05
1,video8758,0.015623,0.005571,0.000267,0.0,0.003743,0.000415,0.0,2e-06,8.2e-05,0.0,0.0,0.0,0.0,0.000805,0.000124,0.0,2e-06,2.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000337,0.000845,0.000984,0.00026,0.000468,0.000141,7e-06,0.000247,0.000498,5.5e-05,0.000457,0.000457,0.000115,0.000382,9.7e-05,2e-06,9.9e-05,0.000152,1.5e-05,2.6e-05,3.7e-05,7e-06,0.000205,6e-05,4e-06,0.000265,0.000406,3.3e-05,6.8e-05,6.2e-05,4.9e-05,0.000944,0.000185,7e-06,0.000439,0.000955,8.4e-05,0.000851,0.000752,0.000273
2,video8768,0.00533,0.001166,2e-06,0.0,0.000927,1.1e-05,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,7.4e-05,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000328,0.000293,0.000617,0.000185,0.00017,0.000101,7e-06,0.000246,0.000393,3.4e-05,0.000288,0.000384,0.00015,8.7e-05,2.9e-05,2e-06,2.5e-05,3.4e-05,2e-06,2e-06,1.3e-05,7e-06,9.6e-05,5.8e-05,0.0,0.000139,0.00013,1.8e-05,1.6e-05,4e-05,1.8e-05,9.2e-05,2.5e-05,1.6e-05,0.000107,0.000199,9e-06,0.000152,0.000188,9.2e-05
3,video8764,0.040748,0.016237,0.000303,0.0,0.007668,0.000118,0.0,0.0,3.3e-05,0.0,0.0,0.0,0.0,0.002083,0.000107,0.0,0.0,1.3e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.1e-05,0.000383,0.001531,4e-06,5.3e-05,0.000316,0.0,0.000114,0.00063,0.0,0.000118,0.001318,2e-06,4.7e-05,0.000151,0.0,5.6e-05,0.00018,0.0,2.7e-05,0.0001,0.0,1.6e-05,2e-05,0.0,8.2e-05,0.000238,0.0,1.8e-05,0.0001,2e-06,5.8e-05,0.000154,0.0,0.000125,0.00053,0.0,7.3e-05,0.000572,2e-06
4,video8760,0.056045,0.012873,0.000278,0.0,0.006645,0.000157,0.0,0.0,6.9e-05,0.0,0.0,0.0,0.0,0.001593,0.000112,0.0,0.0,1.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00015,0.000152,0.000217,0.000114,2.7e-05,1.3e-05,2e-06,8.3e-05,9.6e-05,1.1e-05,5.6e-05,0.00011,4.5e-05,6.3e-05,1.6e-05,0.0,1.8e-05,1.8e-05,4e-06,4e-06,2e-06,0.0,2e-05,9e-06,0.0,3.6e-05,0.000103,9e-06,2e-06,1.8e-05,2.5e-05,7.4e-05,2.7e-05,4e-06,3.6e-05,0.000305,1.1e-05,8.7e-05,0.000217,0.000345


In [21]:
HMP.to_csv("/content/drive/MyDrive/HMP_test.csv")

Merging features

In [31]:
#Merging C3D and CH
dfTest = C3D.merge(CH_new, on=["video"], how="inner")
dfTest.columns
dfTest.head(3)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video8768,0.003526,0.001376,0.046187,8e-06,4.1e-05,0.000295,0.000343,0.000406,7e-05,1.5e-05,0.000354,0.000599,0.000349,0.004517,1.6e-05,5.5e-05,5.7e-05,1.5e-05,1.4e-05,0.000145,4e-06,0.153209,6.9e-07,2.9e-05,0.002326,0.000133,0.002068,1.2e-05,2.6e-05,0.000155,9e-06,0.000481,0.000143,0.000359,0.001009,0.000114,4.4e-05,1.4e-05,0.000449,...,1252.0,1776.0,1087.0,1101.0,1022.0,800.0,800.0,437.0,540.0,408.0,453.0,463.0,376.0,346.0,235.0,320.0,205.0,216.0,239.0,181.0,172.0,138.0,104.0,96.0,94.0,81.0,90.0,87.0,52.0,51.0,63.0,56.0,53.0,58.0,49.0,55.0,53.0,42.0,33.0,231.0
1,video8759,0.007772,0.002302,0.003671,4.8e-05,1.7e-05,1.8e-05,0.000262,0.000129,7.9e-05,2.7e-05,0.000117,0.000437,0.003945,2.8e-05,9e-06,0.000843,0.000109,1.9e-05,0.000801,1.3e-05,3e-06,0.001053,0.00102358,5.4e-05,0.014415,2e-05,3e-05,8.7e-05,0.000483,0.00121,8.1e-05,0.000505,0.001729,0.000173,0.00015,0.000126,0.000186,0.000169,0.000468,...,724.0,1647.0,1338.0,2147.0,2140.0,2234.0,4089.0,945.0,2803.0,375.0,612.0,102.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,video8762,0.001573,0.001398,0.071723,2.5e-05,1.9e-05,0.000827,0.006838,0.000136,2.9e-05,0.000239,0.000566,0.000672,0.000265,0.000454,1.3e-05,0.000316,0.000511,8e-06,1e-05,4e-06,1e-05,0.001801,1.66e-06,4.7e-05,0.00046,3.1e-05,0.000425,0.000133,0.001446,0.000248,0.000171,5.5e-05,0.006299,0.000264,5.3e-05,3e-06,8.4e-05,0.000372,0.000109,...,28.0,44.0,34.0,28.0,31.0,23.0,33.0,24.0,15.0,12.0,16.0,22.0,18.0,7.0,9.0,16.0,6.0,10.0,10.0,12.0,7.0,9.0,15.0,9.0,6.0,6.0,4.0,5.0,3.0,6.0,4.0,3.0,2.0,0.0,5.0,0.0,1.0,1.0,1.0,0.0


In [32]:
#Merging (C3D + CH) with HMP
dfTest = dfTest.merge(HMP, on=["video"], how="inner")
dfTest.columns
dfTest.head(3)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video8768,0.003526,0.001376,0.046187,8e-06,4.1e-05,0.000295,0.000343,0.000406,7e-05,1.5e-05,0.000354,0.000599,0.000349,0.004517,1.6e-05,5.5e-05,5.7e-05,1.5e-05,1.4e-05,0.000145,4e-06,0.153209,6.9e-07,2.9e-05,0.002326,0.000133,0.002068,1.2e-05,2.6e-05,0.000155,9e-06,0.000481,0.000143,0.000359,0.001009,0.000114,4.4e-05,1.4e-05,0.000449,...,0.000328,0.000293,0.000617,0.000185,0.00017,0.000101,7e-06,0.000246,0.000393,3.4e-05,0.000288,0.000384,0.00015,8.7e-05,2.9e-05,2e-06,2.5e-05,3.4e-05,2e-06,2e-06,1.3e-05,7e-06,9.6e-05,5.8e-05,0.0,0.000139,0.00013,1.8e-05,1.6e-05,4e-05,1.8e-05,9.2e-05,2.5e-05,1.6e-05,0.000107,0.000199,9e-06,0.000152,0.000188,9.2e-05
1,video8759,0.007772,0.002302,0.003671,4.8e-05,1.7e-05,1.8e-05,0.000262,0.000129,7.9e-05,2.7e-05,0.000117,0.000437,0.003945,2.8e-05,9e-06,0.000843,0.000109,1.9e-05,0.000801,1.3e-05,3e-06,0.001053,0.00102358,5.4e-05,0.014415,2e-05,3e-05,8.7e-05,0.000483,0.00121,8.1e-05,0.000505,0.001729,0.000173,0.00015,0.000126,0.000186,0.000169,0.000468,...,0.000369,6.4e-05,0.000148,0.000275,6.8e-05,9e-06,2e-06,3.9e-05,9.3e-05,2.3e-05,4.8e-05,0.000107,0.000168,0.000218,2.3e-05,0.0,1.8e-05,3e-05,0.0,0.0,0.0,0.0,4.3e-05,1.6e-05,0.0,0.000134,0.000505,5.5e-05,1.4e-05,4.6e-05,3.6e-05,0.00025,6.1e-05,2e-06,9.6e-05,0.000667,0.0001,0.000248,0.000831,0.001741
2,video8762,0.001573,0.001398,0.071723,2.5e-05,1.9e-05,0.000827,0.006838,0.000136,2.9e-05,0.000239,0.000566,0.000672,0.000265,0.000454,1.3e-05,0.000316,0.000511,8e-06,1e-05,4e-06,1e-05,0.001801,1.66e-06,4.7e-05,0.00046,3.1e-05,0.000425,0.000133,0.001446,0.000248,0.000171,5.5e-05,0.006299,0.000264,5.3e-05,3e-06,8.4e-05,0.000372,0.000109,...,5.1e-05,0.000134,0.0003,4.3e-05,4.3e-05,2.9e-05,4e-06,8.7e-05,0.00013,1.1e-05,9.4e-05,0.00013,7e-06,3.3e-05,4e-05,0.0,1.8e-05,1.1e-05,7e-06,4e-06,0.0,0.0,4e-05,2.9e-05,0.0,6.1e-05,7.6e-05,4e-06,7e-06,2.2e-05,1.1e-05,2.9e-05,2.5e-05,4e-06,6.1e-05,8.7e-05,1.8e-05,9.8e-05,0.000105,1.8e-05


## Reading and merging features for train data set

In [37]:
#=================================Merging features and predicting===================================

In [39]:
#=====================================C3D==============================
#Loading C3D
C3DTrain = pd.read_csv("/content/drive/MyDrive/C3D.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
C3DTrain.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,C3D_62,C3D_63,C3D_64,C3D_65,C3D_66,C3D_67,C3D_68,C3D_69,C3D_70,C3D_71,C3D_72,C3D_73,C3D_74,C3D_75,C3D_76,C3D_77,C3D_78,C3D_79,C3D_80,C3D_81,C3D_82,C3D_83,C3D_84,C3D_85,C3D_86,C3D_87,C3D_88,C3D_89,C3D_90,C3D_91,C3D_92,C3D_93,C3D_94,C3D_95,C3D_96,C3D_97,C3D_98,C3D_99,C3D_100,C3D_101
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,0.001623,0.970125,1.6e-05,0.001298,3.2e-05,1e-06,0.0,0.0,1e-08,2.5e-07,0.0,0.0,5e-08,0.0,1e-08,1e-08,4.2e-07,0.0,3e-08,2e-08,0.0,0.0,0.0,6e-08,0.0,0.0,9e-08,0.0,0.0,1.1e-07,1.4e-07,0.0,0.0,1.7e-07,0.0,0.0,1e-08,1.3e-06,2.6e-06,8e-08
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,4.5e-05,2.7e-05,1.3e-05,7.7e-05,5e-05,0.000138,1.2e-05,7e-06,4.1e-07,5.44e-06,3.4e-07,7e-06,8.399e-05,4e-06,3.97e-06,5.9e-07,0.00012853,8.8e-07,2.228e-05,1.105e-05,3e-06,1.5e-05,1.4e-05,4.09e-06,3e-06,5e-06,4.92e-06,5e-06,6.4e-07,2.372e-05,6.61e-06,1e-05,2e-06,5.8e-06,1e-06,1e-06,1.17e-05,1.5e-07,8.3e-07,0.000106


In [40]:
#================================COLOUR HISTOGRAM=============================
CH = pd.read_csv("/content/drive/MyDrive/colourHistogram.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
CH.head(2)

Unnamed: 0,video,CH_R_0,CH_R_1,CH_R_2,CH_R_3,CH_R_4,CH_R_5,CH_R_6,CH_R_7,CH_R_8,CH_R_9,CH_R_10,CH_R_11,CH_R_12,CH_R_13,CH_R_14,CH_R_15,CH_R_16,CH_R_17,CH_R_18,CH_R_19,CH_R_20,CH_R_21,CH_R_22,CH_R_23,CH_R_24,CH_R_25,CH_R_26,CH_R_27,CH_R_28,CH_R_29,CH_R_30,CH_R_31,CH_R_32,CH_R_33,CH_R_34,CH_R_35,CH_R_36,CH_R_37,CH_R_38,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7365-56,21713.0,3059.0,3169.0,3088.0,3156.0,3287.0,3411.0,3508.0,3725.0,3761.0,3879.0,4184.0,4347.0,4776.0,4893.0,5452.0,5934.0,6391.0,7001.0,7094.0,8119.0,8523.0,9025.0,9385.0,10135.0,10513.0,11072.0,11354.0,11989.0,12068.0,12828.0,12209.0,12996.0,12179.0,12962.0,12388.0,12976.0,12881.0,13598.0,...,9792.0,11763.0,13870.0,16721.0,19241.0,20207.0,18109.0,13610.0,11246.0,8668.0,6698.0,5636.0,4512.0,3818.0,3429.0,3028.0,2747.0,2243.0,2165.0,1847.0,1626.0,1400.0,1391.0,1140.0,941.0,881.0,783.0,726.0,606.0,580.0,460.0,358.0,341.0,295.0,240.0,249.0,161.0,142.0,120.0,507.0
1,video7370-56,803.0,301.0,552.0,597.0,1001.0,1078.0,1548.0,1630.0,2278.0,2262.0,3295.0,3242.0,4241.0,4475.0,4338.0,5218.0,5166.0,5662.0,5537.0,6164.0,5786.0,5026.0,5350.0,4428.0,4363.0,3700.0,4009.0,4305.0,3595.0,10475.0,20380.0,62714.0,148230.0,176974.0,233718.0,95399.0,221205.0,123922.0,133073.0,...,52.0,56.0,46.0,47.0,47.0,43.0,58.0,62.0,46.0,46.0,39.0,48.0,37.0,53.0,43.0,56.0,36.0,50.0,52.0,47.0,51.0,54.0,59.0,54.0,41.0,68.0,49.0,71.0,60.0,78.0,91.0,75.0,93.0,89.0,104.0,125.0,315.0,235.0,72.0,230.0


In [41]:
k = CH["video"].str.split("-", expand = True)
CH["video"], CH["split"] = k[0], k[1]
CH_a = CH[CH["split"] == "0" ].copy()
CH_b = CH[CH["split"] == "56" ].copy()
CH_c = CH[CH["split"] == "112" ].copy()

CH_a.drop(["split"], axis = 1, inplace = True)
CH_b.drop(["split"], axis = 1, inplace = True)
CH_c.drop(["split"], axis = 1, inplace = True)

CH_new = pd.merge(CH_a, CH_b, on = ["video", "video"])
CH_new = pd.merge(CH_new, CH_c, on = ["video", "video"])

del CH_a
del CH_b
del CH_c
del CH

In [42]:
CH_new.head(2)

Unnamed: 0,video,CH_R_0_x,CH_R_1_x,CH_R_2_x,CH_R_3_x,CH_R_4_x,CH_R_5_x,CH_R_6_x,CH_R_7_x,CH_R_8_x,CH_R_9_x,CH_R_10_x,CH_R_11_x,CH_R_12_x,CH_R_13_x,CH_R_14_x,CH_R_15_x,CH_R_16_x,CH_R_17_x,CH_R_18_x,CH_R_19_x,CH_R_20_x,CH_R_21_x,CH_R_22_x,CH_R_23_x,CH_R_24_x,CH_R_25_x,CH_R_26_x,CH_R_27_x,CH_R_28_x,CH_R_29_x,CH_R_30_x,CH_R_31_x,CH_R_32_x,CH_R_33_x,CH_R_34_x,CH_R_35_x,CH_R_36_x,CH_R_37_x,CH_R_38_x,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video7370,94.0,59.0,71.0,96.0,212.0,230.0,438.0,422.0,766.0,796.0,1256.0,1555.0,2213.0,2520.0,2784.0,3605.0,3664.0,3746.0,3925.0,3878.0,4326.0,3640.0,4141.0,3894.0,4164.0,3659.0,4775.0,4809.0,4650.0,6875.0,10452.0,25779.0,63222.0,54686.0,103439.0,77511.0,142976.0,109866.0,196920.0,...,16.0,12.0,17.0,12.0,9.0,3.0,17.0,11.0,14.0,18.0,12.0,13.0,11.0,18.0,25.0,19.0,24.0,18.0,17.0,16.0,20.0,30.0,15.0,2.0,0.0,3.0,3.0,2.0,1.0,5.0,4.0,6.0,4.0,2.0,4.0,1.0,4.0,7.0,3.0,26.0
1,video7367,717.0,182.0,305.0,446.0,784.0,1107.0,2079.0,2072.0,4083.0,5349.0,6871.0,8291.0,8685.0,9823.0,6367.0,11485.0,12691.0,13899.0,13987.0,12573.0,13352.0,8642.0,13460.0,11907.0,12011.0,10837.0,9803.0,10017.0,7281.0,9816.0,8920.0,10015.0,8629.0,9570.0,9358.0,6762.0,9500.0,9194.0,8340.0,...,2713.0,2577.0,2046.0,1691.0,2523.0,2910.0,2115.0,1520.0,1773.0,1411.0,1338.0,1637.0,1779.0,1777.0,1573.0,1860.0,1502.0,1508.0,1976.0,1957.0,1792.0,1399.0,1350.0,1168.0,1281.0,1242.0,1287.0,880.0,820.0,752.0,453.0,308.0,186.0,139.0,97.0,100.0,106.0,92.0,83.0,200.0


In [43]:
#===================================HMP========================================
HMPTrain = pd.read_csv("/content/drive/MyDrive/HMP.csv", index_col=0) #the index column gets added while saving the file, this a workaround for that
HMPTrain.head(2)

Unnamed: 0,video,HMP_1,HMP_2,HMP_3,HMP_4,HMP_5,HMP_6,HMP_7,HMP_8,HMP_9,HMP_10,HMP_11,HMP_12,HMP_13,HMP_14,HMP_15,HMP_16,HMP_17,HMP_18,HMP_19,HMP_20,HMP_21,HMP_22,HMP_23,HMP_24,HMP_25,HMP_26,HMP_27,HMP_28,HMP_29,HMP_30,HMP_31,HMP_32,HMP_33,HMP_34,HMP_35,HMP_36,HMP_37,HMP_38,HMP_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video6634,0.012099,0.001466,1.7e-05,0.0,0.000683,7e-06,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,0.00017,2e-06,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000236,0.000239,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05
1,video6633,0.07817,0.013597,0.000122,0.0,0.00559,7.4e-05,0.0,0.0,2.6e-05,0.0,0.0,0.0,0.0,0.001137,1.7e-05,0.0,0.0,1.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.8e-05,6.9e-05,9.1e-05,2.6e-05,6.1e-05,2.2e-05,4e-06,9.1e-05,4.8e-05,1.3e-05,4.3e-05,9.5e-05,3e-05,4.3e-05,1.3e-05,0.0,3.9e-05,2.6e-05,0.0,9e-06,1.3e-05,0.0,6.9e-05,2.6e-05,4e-06,0.000143,0.000139,9e-06,1.7e-05,4.3e-05,2.2e-05,4.3e-05,2.6e-05,4e-06,7.8e-05,7.4e-05,2.2e-05,5.2e-05,8.2e-05,6.1e-05


Merging the 3 features for training

In [44]:
#Merging C3D and CH
dfTrain = C3DTrain.merge(CH_new, on=["video"], how="inner")
dfTrain.columns
dfTrain.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,CH_B_216,CH_B_217,CH_B_218,CH_B_219,CH_B_220,CH_B_221,CH_B_222,CH_B_223,CH_B_224,CH_B_225,CH_B_226,CH_B_227,CH_B_228,CH_B_229,CH_B_230,CH_B_231,CH_B_232,CH_B_233,CH_B_234,CH_B_235,CH_B_236,CH_B_237,CH_B_238,CH_B_239,CH_B_240,CH_B_241,CH_B_242,CH_B_243,CH_B_244,CH_B_245,CH_B_246,CH_B_247,CH_B_248,CH_B_249,CH_B_250,CH_B_251,CH_B_252,CH_B_253,CH_B_254,CH_B_255
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,1518.0,1632.0,2118.0,2313.0,1497.0,1244.0,976.0,920.0,1015.0,994.0,917.0,989.0,1028.0,853.0,817.0,753.0,840.0,754.0,704.0,710.0,598.0,709.0,596.0,701.0,605.0,649.0,741.0,570.0,756.0,593.0,652.0,414.0,398.0,426.0,212.0,193.0,142.0,124.0,125.0,179.0
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,1801.0,3058.0,2115.0,2743.0,3811.0,3733.0,2720.0,2406.0,2802.0,1807.0,2393.0,4360.0,4375.0,3569.0,3080.0,3590.0,1640.0,3185.0,4538.0,4417.0,4003.0,3156.0,3478.0,2527.0,2722.0,4916.0,4451.0,5572.0,4557.0,5925.0,6668.0,5087.0,12707.0,14920.0,16319.0,20552.0,44012.0,37042.0,12507.0,102085.0


In [45]:
#Merging (C3D + CH) with HMP
dfTrain = dfTrain.merge(HMPTrain, on=["video"], how="inner")
dfTrain.columns
dfTrain.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,HMP_6036,HMP_6037,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,9.9e-05,0.000135,0.000274,5.8e-05,6.3e-05,4.7e-05,0.0,7.4e-05,0.000146,2e-05,7.2e-05,0.000205,5.2e-05,4.9e-05,2e-05,0.0,1.8e-05,3.6e-05,2e-06,2e-06,4e-06,0.0,2.5e-05,1.6e-05,0.0,5.2e-05,9.2e-05,9e-06,4e-06,1.3e-05,7e-06,4.9e-05,2.2e-05,0.0,4.9e-05,0.000164,9e-06,9.7e-05,0.000169,0.00011
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,0.000236,0.000239,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05


In [46]:
dfTrain = dfTrain.merge(gTruth, on=["video"], how="inner")
dfTrain.head(2)

Unnamed: 0,video,C3D_1,C3D_2,C3D_3,C3D_4,C3D_5,C3D_6,C3D_7,C3D_8,C3D_9,C3D_10,C3D_11,C3D_12,C3D_13,C3D_14,C3D_15,C3D_16,C3D_17,C3D_18,C3D_19,C3D_20,C3D_21,C3D_22,C3D_23,C3D_24,C3D_25,C3D_26,C3D_27,C3D_28,C3D_29,C3D_30,C3D_31,C3D_32,C3D_33,C3D_34,C3D_35,C3D_36,C3D_37,C3D_38,C3D_39,...,HMP_6038,HMP_6039,HMP_6040,HMP_6041,HMP_6042,HMP_6043,HMP_6044,HMP_6045,HMP_6046,HMP_6047,HMP_6048,HMP_6049,HMP_6050,HMP_6051,HMP_6052,HMP_6053,HMP_6054,HMP_6055,HMP_6056,HMP_6057,HMP_6058,HMP_6059,HMP_6060,HMP_6061,HMP_6062,HMP_6063,HMP_6064,HMP_6065,HMP_6066,HMP_6067,HMP_6068,HMP_6069,HMP_6070,HMP_6071,HMP_6072,HMP_6073,HMP_6074,HMP_6075,short-term_memorability,long-term_memorability
0,video6632,0.010858,0.010386,0.0,0.0,0.0,0.0,2.7e-07,0.0,1e-08,3.4e-07,8e-08,1e-08,4e-06,0.000105,0.0,4e-08,1e-08,0.0,0.0,0.00013318,0.0,0.0,0.0,0.0,7.6e-07,0.0,9.8e-07,0.0,0.0,0.0,0.0,0.0,6e-08,2.1e-05,5e-08,0.0,1e-08,3e-08,2e-08,...,0.000274,5.8e-05,6.3e-05,4.7e-05,0.0,7.4e-05,0.000146,2e-05,7.2e-05,0.000205,5.2e-05,4.9e-05,2e-05,0.0,1.8e-05,3.6e-05,2e-06,2e-06,4e-06,0.0,2.5e-05,1.6e-05,0.0,5.2e-05,9.2e-05,9e-06,4e-06,1.3e-05,7e-06,4.9e-05,2.2e-05,0.0,4.9e-05,0.000164,9e-06,9.7e-05,0.000169,0.00011,0.884,0.9
1,video6634,0.0002,6.5e-05,0.993807,2e-07,4.7e-07,7.3e-05,3.7e-06,0.000337,6.71e-06,2.29e-06,6.38e-06,7.34e-06,1.9e-05,7e-06,3e-06,3.81e-06,1.411e-05,1e-06,3e-06,2.3e-07,1.9e-07,0.002938,5.2e-07,9.2e-07,1.136e-05,1.1e-05,0.00033104,9.2e-07,2.2e-05,8e-08,2.8e-05,1.3e-05,5.778e-05,3.7e-05,1.683e-05,7e-06,3.99e-06,8.03e-06,1.45e-06,...,0.000324,0.000151,0.000106,5e-05,1.4e-05,0.000156,0.000123,3.1e-05,0.000163,0.000177,9.7e-05,4.7e-05,2.1e-05,2e-06,1.9e-05,2.1e-05,0.0,2e-06,9e-06,2e-06,5.9e-05,2.4e-05,7e-06,5.7e-05,5.2e-05,1.2e-05,2.6e-05,7e-06,1.4e-05,5.7e-05,2.8e-05,7e-06,5.9e-05,9.7e-05,1.2e-05,0.000135,0.000111,7.6e-05,0.743,0.727


## **Creating training data and running models**

In [77]:
dfTrain_x, dfTrain_y_short, dfTrain_y_long = dfTrain.drop(["short-term_memorability", "long-term_memorability"], axis = 1), dfTrain["short-term_memorability"], dfTrain["long-term_memorability"]

In [78]:
dfTrain_x, dfTest_x, cols = dfTrain_x.drop(["video"], axis = 1), dfTest.drop(["video"], axis = 1), dfTest["video"].str.replace("video", "")

In [56]:
#Short term Extra trees
etModShort = ExtraTreesRegressor()
etModShort.fit(dfTrain_x, dfTrain_y_short)

etShort = etModShort.predict(dfTest_x)

In [64]:
#Long term Extra trees
etModLong = ExtraTreesRegressor()
etModLong.fit(dfTrain_x, dfTrain_y_long)

etLong = etModLong.predict(dfTest_x)

## **Create the final output (CSV) file.**

In [86]:
etShort, etLong = pd.DataFrame(etShort, columns=["short-term_memorability"]), pd.DataFrame(etLong, columns=["long-term_memorability"])

In [89]:
finalPred = pd.concat([cols, etShort, etLong], axis = 1)
finalPred.head(3)

Unnamed: 0,video,short-term_memorability,long-term_memorability
0,8768,0.84212,0.75259
1,8759,0.86862,0.77206
2,8762,0.86326,0.74866


In [109]:
gTruthTest = pd.read_csv("/content/drive/MyDrive/CA684_Assignment/Test-set/Ground-truth_test/ground_truth_template.csv")
gTruthTest.head(2)

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,7494,,33,,12
1,7495,,34,,10


In [110]:
gTruthTest = gTruthTest.drop(["short-term_memorability", "long-term_memorability"], axis=1)

In [98]:
gTruthTest.head(2)

Unnamed: 0,video,nb_short-term_annotations,nb_long-term_annotations
0,7494,33,12
1,7495,34,10


In [111]:
finalPred["video"]=finalPred["video"].astype(int)
gTruthTest = pd.merge(gTruthTest, finalPred, on = ["video", "video"])
gTruthTest.columns = ["video", "nb_short-term_annotations", "nb_long-term_annotations", "short-term_memorability", "long-term_memorability"]
gTruthTest = gTruthTest[["video",  "short-term_memorability", "nb_short-term_annotations", "long-term_memorability", "nb_long-term_annotations"]]
gTruthTest.head(2)

In [115]:
gTruthTest.to_csv("/content/drive/MyDrive/Rohit_Nair_20210378_predictions.csv", index = False)