#**Video Memoribility using HMP features**

*This notebook is finding short-term and long-term memoribility uding the HMP features.*

In [1]:
#Mounting my google drive with Google Colab

from google.colab import drive
import os
drive.mount('/content/drive/',force_remount=True)
os.chdir('/content/drive/My Drive')

Mounted at /content/drive/


In [2]:
#Importing all libraries needed for this project

import pandas as pd
import glob

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

##**Calculating the Spearman Correlation coefficients**

In [3]:
def Get_score(Y_pred,Y_true):
    '''Calculate the Spearmann"s correlation coefficient'''
    Y_pred = np.squeeze(Y_pred)
    Y_true = np.squeeze(Y_true)
    if Y_pred.shape != Y_true.shape:
        print('Input shapes don\'t match!')
    else:
        if len(Y_pred.shape) == 1:
            Res = pd.DataFrame({'Y_true':Y_true,'Y_pred':Y_pred})
            score_mat = Res[['Y_true','Y_pred']].corr(method='spearman',min_periods=1)
            print('The Spearman\'s correlation coefficient is: %.3f' % score_mat.iloc[1][0])
        else:
            for ii in range(Y_pred.shape[1]):
                Get_score(Y_pred[:,ii],Y_true[:,ii])

##**Working with HMP feature Videos**

*1. Check the length of the HMP features.*

*2. Loading the HMP features and storing them in a dataframe.*

*3. Export the dataframe for future use.*

In [4]:
#Storing all the HMP dev videos in a variable HMP_path

HMP_path = glob.glob('./HMP/*.txt')
len(HMP_path)

6000

In [6]:
#Loading the HMP feature

def read_HMP(fname):
    """Scan HMP(Histogram of Motion Patterns) features from file"""
    vn = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs=line.split()
            vn.append(pairs[0])
            HMP_temp = { int(p.split(':')[0]) : float(p.split(':')[1]) for p in pairs}
            df['video']=vn
    # there are 6075 bins, fill zeros
   
    HMP = np.zeros(6075)
    for idx in HMP_temp.keys():
        HMP[idx-1] = HMP_temp[idx]            
    return HMP


#Extracting the videos into a dataframe

HMP_extract=pd.DataFrame()
for root, dirs, files in os.walk("/content/drive/My Drive/HMP"):
  pass
lst=[]
for i in range(0,8000):
  value='video'+str(i)+'.txt'
  if value in files:
    txt=read_HMP("/content/drive/My Drive/HMP/"+value)
    HMP_extract=HMP_extract.append([txt],ignore_index=True)
    lst.append('video'+str(i))
HMP_extract['Video']=lst

In [7]:
#Showing the HMP_extract 
HMP_extract.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,Video
0,0.125563,0.024036,0.000314,0.0,0.015864,0.000358,0.0,0.0,8.6e-05,0.0,0.0,0.0,0.0,0.002795,5.4e-05,0.0,0.0,3.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000173,0.000459,0.0,0.000148,0.000104,0.0,0.000121,0.000551,0.0,0.000114,0.000884,2e-06,0.000116,7.7e-05,2e-06,2.7e-05,0.000136,0.0,0.0,2e-06,0.0,9.1e-05,3.5e-05,0.0,0.000163,0.000467,2e-06,1e-05,1.7e-05,0.0,0.000393,0.000279,0.0,0.000289,0.001926,0.0,8.6e-05,0.00058,0.0,video3
1,0.007526,0.001421,6.8e-05,0.0,0.001184,0.000143,0.0,0.0,7.9e-05,0.0,0.0,0.0,0.0,0.000246,2.4e-05,0.0,0.0,4.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000308,0.001054,0.000751,0.000176,6.2e-05,0.0,0.000123,0.000398,8.6e-05,0.000246,0.000433,0.000446,0.000143,5.3e-05,0.0,5.3e-05,9.9e-05,9e-06,4e-06,3.3e-05,4e-06,5.1e-05,3.5e-05,0.0,6.2e-05,0.000358,3.5e-05,2.4e-05,8.3e-05,5.3e-05,0.000244,6.6e-05,0.0,8.1e-05,0.000617,9.4e-05,0.00022,0.000762,0.001224,video4
2,0.109584,0.018978,0.000289,0.0,0.008774,0.000208,0.0,2e-06,8.8e-05,0.0,0.0,0.0,0.0,0.002046,6.1e-05,0.0,0.0,3.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.7e-05,0.000158,7.3e-05,2.1e-05,9e-06,2e-06,1.9e-05,9.5e-05,2.1e-05,1.9e-05,9e-05,7.3e-05,5e-05,2.4e-05,0.0,1.2e-05,2.1e-05,0.0,0.0,2e-06,0.0,1.7e-05,7e-06,2e-06,6.6e-05,0.000203,2.6e-05,2e-06,4e-05,7e-06,5.4e-05,4.5e-05,0.0,2.8e-05,0.000291,3.3e-05,5.2e-05,0.000258,0.000215,video6
3,0.120431,0.013561,0.000277,0.0,0.018974,0.000913,0.0,2.4e-05,0.000713,0.0,0.0,0.0,0.0,0.002496,0.000149,0.0,1.1e-05,0.000157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000434,0.000543,0.000412,0.000412,4.5e-05,3e-06,0.000144,0.000282,3.7e-05,0.000197,0.000218,0.000157,0.000237,2.1e-05,0.0,4e-05,5.6e-05,8e-06,5e-06,1.3e-05,1.9e-05,0.000168,1.3e-05,0.0,0.000133,0.000202,2.9e-05,2.9e-05,3.5e-05,5.9e-05,0.00111,7.5e-05,8e-06,0.000333,0.000793,0.000101,0.000588,0.000503,0.000452,video8
4,0.005026,0.001356,5.5e-05,0.0,0.000665,2.9e-05,0.0,0.0,2.4e-05,0.0,0.0,0.0,0.0,0.000147,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000996,0.001604,0.000103,0.000768,0.000215,9e-06,0.000415,0.000926,2e-05,0.000538,0.001178,5e-05,0.000518,0.000169,7e-06,0.000134,0.000169,7e-06,2.6e-05,4.6e-05,7e-06,0.000373,8.8e-05,0.0,0.000338,0.000441,2.9e-05,7e-05,0.000149,9e-06,0.000882,0.0002,9e-06,0.000559,0.001097,1.8e-05,0.000632,0.001128,6.4e-05,video10


In [8]:
# Exporting the dataframe into a csv

HMP_extract.to_csv('HMP_features_train.csv')

###**Importing the Ground Truth for Test Videos**

*6000 videos with the actual ground-truth values of short-term memoribility and long-term memoribility* 

In [14]:
#Reading the csv

ground_truth = pd.read_csv('./ground-truth.csv')
ground_truth.head(5)

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,video3.webm,0.924,34,0.846,13
1,video4.webm,0.923,33,0.667,12
2,video6.webm,0.863,33,0.7,10
3,video8.webm,0.922,33,0.818,11
4,video10.webm,0.95,34,0.9,10


In [16]:
#Knowing the datatypes of ground truth values

ground_truth.dtypes

video                         object
short-term_memorability      float64
nb_short-term_annotations      int64
long-term_memorability       float64
nb_long-term_annotations       int64
dtype: object

In [17]:
#Removing .webm from the Video column

GT = []
for i in range(len(ground_truth['video'])):
  video = ground_truth['video'][i]
  new = video.replace(".webm","")
  GT.append(new)  

In [18]:
#Replacing GT names in Video column

ground_truth['video'] = GT
ground_truth.head(5)

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,video3,0.924,34,0.846,13
1,video4,0.923,33,0.667,12
2,video6,0.863,33,0.7,10
3,video8,0.922,33,0.818,11
4,video10,0.95,34,0.9,10


In [19]:
#Changing video to Video

ground_truth = ground_truth.rename({'video': 'Video'}, axis='columns')
ground_truth.head(5)

Unnamed: 0,Video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,video3,0.924,34,0.846,13
1,video4,0.923,33,0.667,12
2,video6,0.863,33,0.7,10
3,video8,0.922,33,0.818,11
4,video10,0.95,34,0.9,10


# **Combining the Two**

*Merging the ground truth values and the extracted HMP feature values* 

In [20]:
#Merging the two files - ground truth, hmp feature videos

final = pd.merge(HMP_extract, ground_truth, on="Video")
final.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,Video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,0.125563,0.024036,0.000314,0.0,0.015864,0.000358,0.0,0.0,8.6e-05,0.0,0.0,0.0,0.0,0.002795,5.4e-05,0.0,0.0,3.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000104,0.0,0.000121,0.000551,0.0,0.000114,0.000884,2e-06,0.000116,7.7e-05,2e-06,2.7e-05,0.000136,0.0,0.0,2e-06,0.0,9.1e-05,3.5e-05,0.0,0.000163,0.000467,2e-06,1e-05,1.7e-05,0.0,0.000393,0.000279,0.0,0.000289,0.001926,0.0,8.6e-05,0.00058,0.0,video3,0.924,34,0.846,13
1,0.007526,0.001421,6.8e-05,0.0,0.001184,0.000143,0.0,0.0,7.9e-05,0.0,0.0,0.0,0.0,0.000246,2.4e-05,0.0,0.0,4.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.2e-05,0.0,0.000123,0.000398,8.6e-05,0.000246,0.000433,0.000446,0.000143,5.3e-05,0.0,5.3e-05,9.9e-05,9e-06,4e-06,3.3e-05,4e-06,5.1e-05,3.5e-05,0.0,6.2e-05,0.000358,3.5e-05,2.4e-05,8.3e-05,5.3e-05,0.000244,6.6e-05,0.0,8.1e-05,0.000617,9.4e-05,0.00022,0.000762,0.001224,video4,0.923,33,0.667,12
2,0.109584,0.018978,0.000289,0.0,0.008774,0.000208,0.0,2e-06,8.8e-05,0.0,0.0,0.0,0.0,0.002046,6.1e-05,0.0,0.0,3.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9e-06,2e-06,1.9e-05,9.5e-05,2.1e-05,1.9e-05,9e-05,7.3e-05,5e-05,2.4e-05,0.0,1.2e-05,2.1e-05,0.0,0.0,2e-06,0.0,1.7e-05,7e-06,2e-06,6.6e-05,0.000203,2.6e-05,2e-06,4e-05,7e-06,5.4e-05,4.5e-05,0.0,2.8e-05,0.000291,3.3e-05,5.2e-05,0.000258,0.000215,video6,0.863,33,0.7,10
3,0.120431,0.013561,0.000277,0.0,0.018974,0.000913,0.0,2.4e-05,0.000713,0.0,0.0,0.0,0.0,0.002496,0.000149,0.0,1.1e-05,0.000157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.5e-05,3e-06,0.000144,0.000282,3.7e-05,0.000197,0.000218,0.000157,0.000237,2.1e-05,0.0,4e-05,5.6e-05,8e-06,5e-06,1.3e-05,1.9e-05,0.000168,1.3e-05,0.0,0.000133,0.000202,2.9e-05,2.9e-05,3.5e-05,5.9e-05,0.00111,7.5e-05,8e-06,0.000333,0.000793,0.000101,0.000588,0.000503,0.000452,video8,0.922,33,0.818,11
4,0.005026,0.001356,5.5e-05,0.0,0.000665,2.9e-05,0.0,0.0,2.4e-05,0.0,0.0,0.0,0.0,0.000147,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000215,9e-06,0.000415,0.000926,2e-05,0.000538,0.001178,5e-05,0.000518,0.000169,7e-06,0.000134,0.000169,7e-06,2.6e-05,4.6e-05,7e-06,0.000373,8.8e-05,0.0,0.000338,0.000441,2.9e-05,7e-05,0.000149,9e-06,0.000882,0.0002,9e-06,0.000559,0.001097,1.8e-05,0.000632,0.001128,6.4e-05,video10,0.95,34,0.9,10


In [21]:
#Dropping off unnecessary folders
final = final.drop(['nb_short-term_annotations','nb_long-term_annotations'],axis='columns')

In [22]:
final.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,Video,short-term_memorability,long-term_memorability
0,0.125563,0.024036,0.000314,0.0,0.015864,0.000358,0.0,0.0,8.6e-05,0.0,0.0,0.0,0.0,0.002795,5.4e-05,0.0,0.0,3.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000148,0.000104,0.0,0.000121,0.000551,0.0,0.000114,0.000884,2e-06,0.000116,7.7e-05,2e-06,2.7e-05,0.000136,0.0,0.0,2e-06,0.0,9.1e-05,3.5e-05,0.0,0.000163,0.000467,2e-06,1e-05,1.7e-05,0.0,0.000393,0.000279,0.0,0.000289,0.001926,0.0,8.6e-05,0.00058,0.0,video3,0.924,0.846
1,0.007526,0.001421,6.8e-05,0.0,0.001184,0.000143,0.0,0.0,7.9e-05,0.0,0.0,0.0,0.0,0.000246,2.4e-05,0.0,0.0,4.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000751,0.000176,6.2e-05,0.0,0.000123,0.000398,8.6e-05,0.000246,0.000433,0.000446,0.000143,5.3e-05,0.0,5.3e-05,9.9e-05,9e-06,4e-06,3.3e-05,4e-06,5.1e-05,3.5e-05,0.0,6.2e-05,0.000358,3.5e-05,2.4e-05,8.3e-05,5.3e-05,0.000244,6.6e-05,0.0,8.1e-05,0.000617,9.4e-05,0.00022,0.000762,0.001224,video4,0.923,0.667
2,0.109584,0.018978,0.000289,0.0,0.008774,0.000208,0.0,2e-06,8.8e-05,0.0,0.0,0.0,0.0,0.002046,6.1e-05,0.0,0.0,3.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.3e-05,2.1e-05,9e-06,2e-06,1.9e-05,9.5e-05,2.1e-05,1.9e-05,9e-05,7.3e-05,5e-05,2.4e-05,0.0,1.2e-05,2.1e-05,0.0,0.0,2e-06,0.0,1.7e-05,7e-06,2e-06,6.6e-05,0.000203,2.6e-05,2e-06,4e-05,7e-06,5.4e-05,4.5e-05,0.0,2.8e-05,0.000291,3.3e-05,5.2e-05,0.000258,0.000215,video6,0.863,0.7
3,0.120431,0.013561,0.000277,0.0,0.018974,0.000913,0.0,2.4e-05,0.000713,0.0,0.0,0.0,0.0,0.002496,0.000149,0.0,1.1e-05,0.000157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000412,0.000412,4.5e-05,3e-06,0.000144,0.000282,3.7e-05,0.000197,0.000218,0.000157,0.000237,2.1e-05,0.0,4e-05,5.6e-05,8e-06,5e-06,1.3e-05,1.9e-05,0.000168,1.3e-05,0.0,0.000133,0.000202,2.9e-05,2.9e-05,3.5e-05,5.9e-05,0.00111,7.5e-05,8e-06,0.000333,0.000793,0.000101,0.000588,0.000503,0.000452,video8,0.922,0.818
4,0.005026,0.001356,5.5e-05,0.0,0.000665,2.9e-05,0.0,0.0,2.4e-05,0.0,0.0,0.0,0.0,0.000147,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000103,0.000768,0.000215,9e-06,0.000415,0.000926,2e-05,0.000538,0.001178,5e-05,0.000518,0.000169,7e-06,0.000134,0.000169,7e-06,2.6e-05,4.6e-05,7e-06,0.000373,8.8e-05,0.0,0.000338,0.000441,2.9e-05,7e-05,0.000149,9e-06,0.000882,0.0002,9e-06,0.000559,0.001097,1.8e-05,0.000632,0.001128,6.4e-05,video10,0.95,0.9


#**Splitting the data for traning and test**

*The X,Y are for training the entire model to get test values for short_term and long_term*

*Then the splited ones are used for getting the spearmen's coefficient*


In [23]:
#Applying train_test_split

X = final.drop(['Video','short-term_memorability','long-term_memorability'],axis=1)
Y = final[['short-term_memorability','long-term_memorability']].values # targets

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=42) 

*Checking if their shapes are as required*


In [27]:
print('X',X.shape)
print('Y',Y.shape)

print('X_train', X_train.shape)
print('X_test', X_test.shape)
print('Y_train', Y_train.shape)
print('Y_test', Y_test.shape) 

X (6000, 6075)
Y (6000, 2)
X_train (4800, 6075)
X_test (1200, 6075)
Y_train (4800, 2)
Y_test (1200, 2)


##**Extracting the test videos** 

In [28]:
#Storing the path of the videos in a variable and checking its length

HMP_test_path = glob.glob('./HMP_test/HMP_test/*.txt')
len(HMP_test_path)

2000

In [10]:
#Loading the HMP feature for the test videos

def read_HMP(fname):
    """Scan HMP(Histogram of Motion Patterns) features from file"""
    vn = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs=line.split()
            vn.append(pairs[0])
            HMP_temp = { int(p.split(':')[0]) : float(p.split(':')[1]) for p in pairs}
            df['video']=vn
    # there are 6075 bins, fill zeros
   
    HMP = np.zeros(6075)
    for idx in HMP_temp.keys():
        HMP[idx-1] = HMP_temp[idx]            
    return HMP

#Storing them in a dataframe

HMP_test=pd.DataFrame()
for root, dirs, files in os.walk("/content/drive/My Drive/HMP_test/HMP_test"):
  pass
lst=[]
for i in range(0,10009):
  value='video'+str(i)+'.txt'
  if value in files:
    txt=read_HMP("/content/drive/My Drive/HMP_test/HMP_test/"+value)
    HMP_test=HMP_test.append([txt],ignore_index=True)
    lst.append('video'+str(i))
HMP_test['Video']=lst

In [11]:
#Displaying the test videos

HMP_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,Video
0,0.022039,0.005264,9.8e-05,0.0,0.003689,0.000129,0.0,0.0,4.5e-05,0.0,0.0,0.0,0.0,0.000907,3.6e-05,0.0,0.0,1.1e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000299,0.000916,0.000118,0.00012,0.000109,0.0,0.000218,0.000419,2.2e-05,0.000156,0.000468,9.1e-05,9.6e-05,5.3e-05,0.0,5.1e-05,7.4e-05,2e-06,7e-06,2.5e-05,0.0,0.000136,8.2e-05,4e-06,0.000189,0.000352,3.3e-05,3.6e-05,9.6e-05,2.5e-05,0.000114,5.6e-05,2e-06,0.000136,0.000388,3.8e-05,0.000165,0.000488,0.000187,video7494
1,0.004694,0.001934,4e-05,0.0,0.001226,5.8e-05,0.0,0.0,2.5e-05,0.0,0.0,0.0,0.0,0.000246,2.7e-05,0.0,0.0,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000226,0.000386,3.6e-05,0.000107,7.8e-05,2e-06,0.000134,0.000185,7e-06,0.000118,0.000192,2.9e-05,6.5e-05,1.3e-05,0.0,2.2e-05,4.2e-05,0.0,1.1e-05,1.1e-05,2e-06,4e-05,2.2e-05,0.0,5.6e-05,9.8e-05,7e-06,9e-06,2.7e-05,0.0,5.6e-05,2.7e-05,2e-06,8.7e-05,0.000121,4e-06,0.000109,0.000136,2.9e-05,video7495
2,0.02653,0.006208,5.8e-05,0.0,0.00228,3.1e-05,0.0,0.0,9e-06,0.0,0.0,0.0,0.0,0.000421,2e-05,0.0,0.0,2e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000175,0.00045,0.000141,5.4e-05,3.1e-05,4e-06,9.2e-05,0.000134,2.9e-05,0.000128,0.000217,6.7e-05,4.7e-05,2.5e-05,2e-06,2.7e-05,4e-05,2e-06,1.8e-05,1.8e-05,4e-06,3.1e-05,2.9e-05,4e-06,4.5e-05,9e-05,2e-06,4e-06,2.7e-05,1.1e-05,3.4e-05,1.1e-05,2e-06,6.3e-05,8.1e-05,4e-06,9.2e-05,0.000143,5.1e-05,video7496
3,0.045109,0.008317,0.000151,0.0,0.005598,0.000128,0.0,8e-06,5e-05,0.0,0.0,0.0,0.0,0.000741,5.3e-05,0.0,0.0,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000181,0.000266,0.000219,0.000103,4.5e-05,0.0,6.8e-05,0.000181,3.3e-05,0.000141,0.000249,0.000128,0.000163,4e-05,0.0,3.5e-05,5.3e-05,3e-06,5e-06,1.5e-05,0.0,5.3e-05,1.8e-05,3e-06,0.000158,0.000337,1.8e-05,2.3e-05,2.3e-05,3.5e-05,0.000256,8.3e-05,3e-06,0.000116,0.000467,2e-05,0.000279,0.00047,0.000219,video7497
4,0.034691,0.008619,0.000412,0.0,0.028355,0.000538,0.0,0.0,0.000325,0.0,0.0,0.0,0.0,0.002234,0.000159,0.0,0.0,4.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000215,0.000215,8.2e-05,7.7e-05,2.9e-05,0.0,5.8e-05,0.000131,1.1e-05,9.7e-05,0.00012,4.9e-05,0.0001,2e-05,0.0,4e-05,3.5e-05,2e-06,2e-06,2e-06,0.0,3.8e-05,1.3e-05,0.0,9.3e-05,0.000206,2e-05,1.3e-05,2.4e-05,9e-06,0.000148,3.8e-05,2e-06,0.000117,0.000206,2.2e-05,0.000228,0.000334,8.6e-05,video7498


In [12]:
#Exporting the HMP test videos to a csv

HMP_test.to_csv("HMP_features_test.csv")

In [29]:
# Storing these in a variable to use for testing

X_test_data = HMP_test.drop('Video',axis=1)

##**Random Forest Regressor**

*1. To get the spearman's correlation coefficient*



In [30]:
#Fitting the Random Forest Regressor to the dev set

rfRegressor1 = RandomForestRegressor(n_estimators=10)
rfRegressor1.fit(X_train,Y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [33]:
#Then predict with the remaining videos

pred_rf_train = rfRegressor1.predict(X_test)

In [34]:
#Display the predicted values 

pred_rf_train

array([[0.8863, 0.8083],
       [0.8888, 0.7285],
       [0.8436, 0.835 ],
       ...,
       [0.849 , 0.7698],
       [0.8952, 0.7529],
       [0.896 , 0.7908]])

In [35]:
#Get the spearman's score for the random forest model 

Rf_scores = Get_score(pred_rf_train, Y_test)

The Spearman's correlation coefficient is: 0.215
The Spearman's correlation coefficient is: 0.028


*2. To get predicted values for the test_videos*

In [36]:
#Fitting the Random Forest Regressor to the dev set

rfRegressor2 = RandomForestRegressor(n_estimators=10)
rfRegressor2.fit(X,Y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [37]:
#Then predict with the remaining videos with test data videos

pred_rf_test = rfRegressor2.predict(X_test_data)

In [38]:
#Display the predicted values 

pred_rf_test

array([[0.8405, 0.7496],
       [0.8893, 0.794 ],
       [0.8998, 0.7723],
       ...,
       [0.8972, 0.7913],
       [0.864 , 0.8072],
       [0.8918, 0.8384]])

In [41]:
#Store the predictions in a dataframe

results_rf = pd.DataFrame(pred_rf_test,columns = ['short_term_memorobility_rf','long_term_memorobility_rf'])
results_rf.head()

Unnamed: 0,short_term_memorobility_rf,long_term_memorobility_rf
0,0.8405,0.7496
1,0.8893,0.794
2,0.8998,0.7723
3,0.8931,0.7479
4,0.7879,0.6947


In [42]:
#Export the dataframe to store them

results_rf.to_csv("HMP_RandomForest.csv")

##**Decision Tree Regressor**

*1. To get the spearman's correlation coefficient*

In [43]:
##Fitting the Decision Tree Regressor to the dev set

dtRegressor1 = DecisionTreeRegressor()
dtRegressor1.fit(X_train, Y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [45]:
#Then predict with the remaining videos

pred_dt_train = dtRegressor1.predict(X_test)
pred_dt_train

array([[0.856, 0.636],
       [0.81 , 0.917],
       [0.952, 0.923],
       ...,
       [0.857, 0.6  ],
       [0.982, 0.833],
       [0.893, 0.6  ]])

In [46]:
#Get the spearman's score for the decision tree model 

Dt_scores = Get_score(pred_dt_train, Y_test)

The Spearman's correlation coefficient is: 0.058
The Spearman's correlation coefficient is: 0.003


*2. To get predicted values for the test_videos*

In [47]:
#Fitting the Decision Tree Regressor to the dev set

dtRegressor2 = DecisionTreeRegressor()
dtRegressor2.fit(X, Y)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [48]:
#Then predict with the test videos

pred_dt_test = dtRegressor2.predict(X_test_data)
pred_dt_test

array([[0.923, 0.818],
       [0.898, 0.778],
       [0.984, 0.9  ],
       ...,
       [0.868, 0.7  ],
       [0.713, 0.6  ],
       [0.896, 0.833]])

In [49]:
#Store the predicted values in a dataframe and export them as a csv

results_dt = pd.DataFrame(pred_dt_test,columns = ['short_term_memorobility_dt','long_term_memorobility_dt'])
results_dt.to_csv("HMP_DecisionTree.csv")

# **K Nearest Neighbors Regressor**

*1. To get the spearman's correlation coefficient*

In [50]:
#Fitting the K Nearest Neighbors Regressor to the dev set

knnRegressor1 = KNeighborsRegressor(n_neighbors = 77)
knnRegressor1.fit(X_test, Y_test)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=77, p=2,
                    weights='uniform')

In [51]:
#Then predict with the remaining videos

pred_knn_train = knnRegressor1.predict(X_test)
pred_knn_train

array([[0.88145455, 0.79919481],
       [0.85951948, 0.74683117],
       [0.88135065, 0.80758442],
       ...,
       [0.88671429, 0.76875325],
       [0.86661039, 0.7437013 ],
       [0.86532468, 0.77263636]])

In [52]:
#Get the spearman's score for the k nearest neighbors model 

Knn_scores = Get_score(pred_knn_train, Y_test)

The Spearman's correlation coefficient is: 0.287
The Spearman's correlation coefficient is: 0.155


*2. To get predicted values for the test_videos*

In [53]:
#Fitting the K Nearest Neighbors Regressor to the dev set

knnRegressor2 = KNeighborsRegressor(n_neighbors = 77)
knnRegressor2.fit(X, Y)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=77, p=2,
                    weights='uniform')

In [54]:
#Then predict with the test videos

pred_knn_test = knnRegressor2.predict(X_test_data)
pred_knn_test

array([[0.85042857, 0.76193506],
       [0.84903896, 0.77232468],
       [0.88896104, 0.8064026 ],
       ...,
       [0.88406494, 0.82076623],
       [0.8642987 , 0.76151948],
       [0.83315584, 0.7518961 ]])

In [55]:
#Store the predicted values in a dataframe and export them as a csv

results_knn = pd.DataFrame(pred_knn_test,columns = ['short_term_memorobility_knn','long_term_memorobility_knn'])
results_knn.to_csv("HMP_KNearestNeighbor.csv")

##**AdaBoost Regressor**

*1. To get the spearman's correlation coefficient*

In [56]:
#Seperate the short-term and long-term values for the model

Y_short = final[['short-term_memorability']].values
Y_long = final[['long-term_memorability']].values

In [57]:
#Spliting the videos based on short-term and long-term

short_X_train,short_X_test,short_Y_train,short_Y_test = train_test_split(X,Y_short,test_size=0.2,random_state=40)
long_X_train,long_X_test,long_Y_train,long_Y_test = train_test_split(X,Y_long,test_size=0.2,random_state=40)

In [58]:
#Fit the adaboost model with both short-term and long-term 

abRegressor1 = AdaBoostRegressor()
abRegressor1.fit(short_X_train, short_Y_train)

abRegressor1 = AdaBoostRegressor()
abRegressor1.fit(long_X_train, long_Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

In [61]:
#Predicting the values

Y_pred_short = abRegressor1.predict(short_X_test)
print(Y_pred_short)

Y_pred_long = abRegressor1.predict(long_X_test)
print(Y_pred_long)

[0.73168296 0.75162395 0.75657499 ... 0.73108277 0.70481319 0.75162395]
[0.73168296 0.75162395 0.75657499 ... 0.73108277 0.70481319 0.75162395]


In [62]:
#Get the spearman's score for the AdaBoost model 

Ab_short_scores = Get_score(Y_pred_short, short_Y_test)
Ab_short_scores = Get_score(Y_pred_long, long_Y_test)

The Spearman's correlation coefficient is: 0.163
The Spearman's correlation coefficient is: 0.071


*2. To get predicted values for the test_videos*

In [63]:
#Fit the adaboost model with both short-term and long-term

abRegressor2 = AdaBoostRegressor()
abRegressor2.fit(X, Y_short)

abRegressor2 = AdaBoostRegressor()
abRegressor2.fit(X, Y_long)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

In [65]:
#Predicting the values with test videos

pred_short_test = abRegressor2.predict(X_test_data)
print(pred_short_test)

pred_long_test = abRegressor2.predict(X_test_data)
print(pred_long_test)

[0.72304491 0.7390104  0.75314125 ... 0.74930191 0.7144286  0.72766181]
[0.72304491 0.7390104  0.75314125 ... 0.74930191 0.7144286  0.72766181]


In [70]:
#Storing the predictions in a dataframe

results_ab = pd.DataFrame(pred_short_test,columns = ['short_term_memorobility_ab'])
results_ab['lond_term_memorobility_ab'] = pred_long_test
results_ab

Unnamed: 0,short_term_memorobility_ab,lond_term_memorobility_ab
0,0.723045,0.723045
1,0.739010,0.739010
2,0.753141,0.753141
3,0.749302,0.749302
4,0.723045,0.723045
...,...,...
1995,0.723045,0.723045
1996,0.727662,0.727662
1997,0.749302,0.749302
1998,0.714429,0.714429


In [71]:
#Exporting them into a csv

results_ab.to_csv("HMP_ADABoost.csv")

##**Neural Network Model**

Just checking only for getting the spearman's score

In [72]:
#Scaling the features

sc = StandardScaler()
short_X_train = sc.fit_transform(short_X_train)
short_X_test = sc.transform(short_X_test)

long_X_train = sc.fit_transform(long_X_train)
long_X_test = sc.transform(long_X_test)

In [80]:
#Building the neural network model

neuralNetModel = Sequential()
neuralNetModel.add(Dense(32, activation = 'relu', input_dim = 6075))
neuralNetModel.add(Dense(units = 32, activation = 'relu'))
neuralNetModel.add(Dense(units = 32, activation = 'relu'))
neuralNetModel.add(Dense(units = 1))

In [81]:
#Compiling the model

neuralNetModel.compile(optimizer = 'adam',loss = 'mean_squared_error')

In [82]:
#fitting the model

neuralNetModel.fit(short_X_train, short_Y_train, batch_size = 10, epochs = 51)
neuralNetModel.fit(long_X_train, long_Y_train, batch_size = 10, epochs = 51)

Epoch 1/51
Epoch 2/51
Epoch 3/51
Epoch 4/51
Epoch 5/51
Epoch 6/51
Epoch 7/51
Epoch 8/51
Epoch 9/51
Epoch 10/51
Epoch 11/51
Epoch 12/51
Epoch 13/51
Epoch 14/51
Epoch 15/51
Epoch 16/51
Epoch 17/51
Epoch 18/51
Epoch 19/51
Epoch 20/51
Epoch 21/51
Epoch 22/51
Epoch 23/51
Epoch 24/51
Epoch 25/51
Epoch 26/51
Epoch 27/51
Epoch 28/51
Epoch 29/51
Epoch 30/51
Epoch 31/51
Epoch 32/51
Epoch 33/51
Epoch 34/51
Epoch 35/51
Epoch 36/51
Epoch 37/51
Epoch 38/51
Epoch 39/51
Epoch 40/51
Epoch 41/51
Epoch 42/51
Epoch 43/51
Epoch 44/51
Epoch 45/51
Epoch 46/51
Epoch 47/51
Epoch 48/51
Epoch 49/51
Epoch 50/51
Epoch 51/51
Epoch 1/51
Epoch 2/51
Epoch 3/51
Epoch 4/51
Epoch 5/51
Epoch 6/51
Epoch 7/51
Epoch 8/51
Epoch 9/51
Epoch 10/51
Epoch 11/51
Epoch 12/51
Epoch 13/51
Epoch 14/51
Epoch 15/51
Epoch 16/51
Epoch 17/51
Epoch 18/51
Epoch 19/51
Epoch 20/51
Epoch 21/51
Epoch 22/51
Epoch 23/51
Epoch 24/51
Epoch 25/51
Epoch 26/51
Epoch 27/51
Epoch 28/51
Epoch 29/51
Epoch 30/51
Epoch 31/51
Epoch 32/51
Epoch 33/51
Epoch 34/5

<tensorflow.python.keras.callbacks.History at 0x7f36d8e9eb50>

In [84]:
#Getting the predictions and finding the spearmen's score

pred_short_nn = neuralNetModel.predict(short_X_test)
NN_short_scores = Get_score(pred_short_nn, short_Y_test)

pred_long_nn = neuralNetModel.predict(long_X_test)
NN_long_scores = Get_score(pred_long_nn, long_Y_test)

The Spearman's correlation coefficient is: -0.046
The Spearman's correlation coefficient is: 0.020


# **Gradiant Boost Regressor**

Just checking the spearmen's score

In [85]:
#Fitting the HMP feature values to the gradiant boost regressor

gbRegressor= GradientBoostingRegressor()
gbRegressor.fit(short_X_train,short_Y_train)
gbRegressor.fit(long_X_train,long_Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [86]:
#Predict values
 
pred_short_gb = gbRegressor.predict(short_X_test)
pred_long_gb = gbRegressor.predict(long_X_test)

In [87]:
#Get the spearmen's score

Get_score(pred_short_gb, short_Y_test)
Get_score(pred_long_gb, long_Y_test)

The Spearman's correlation coefficient is: 0.225
The Spearman's correlation coefficient is: 0.105


                                                         **End**

