 <center><h2> PREDICTING MOVIE RATINGS FROM AUDIENCE BEHAVIOURS ON MOVIE TRAILERS

### <center><h2> Base Model

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LinearRegression
from joblib import dump
import plotly.graph_objects as go
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from os import path
from sklearn.model_selection import KFold
from statistics import mean, stdev 

<h4>Feature Selection

In [2]:
Selected_Features_10 = ['Motion_image_Avg', 'Motion_image_Std', 'Motion_image_25P', 'Motion_image_75P', 'Motion_image_Peaks',
                       'Motion_image_0', 'Motion_image_10', 'Motion_image_20', 'Motion_image_30', 'Motion_image_40',
                       'Motion_image_50', 'Motion_image_60', 'Motion_image_70', 'Motion_image_80', 'Motion_image_90']

Selected_Features_5 = ['Motion_image_Avg', 'Motion_image_Std', 'Motion_image_25P', 'Motion_image_75P', 'Motion_image_Peaks',
                       'Motion_image_0', 'Motion_image_20', 'Motion_image_40', 'Motion_image_60', 'Motion_image_80']

Selected_Features_4 = ['Motion_image_Avg', 'Motion_image_Std', 'Motion_image_25P', 'Motion_image_75P', 'Motion_image_Peaks',
                       'Motion_image_0', 'Motion_image_25', 'Motion_image_50', 'Motion_image_75']

 <h4>classifier set

In [3]:
classifierTitleList = [ 
                    'MLPClassifier-adam', 'KNeighborsClassifier', 'SVC(gamma=2, C=1)','LinearDiscriminant', 'LinearRegression'
                ]

# Define the model
classifiersList = [
               MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1),
               KNeighborsClassifier(),
               SVC(gamma=2, C=1),
               LinearDiscriminantAnalysis(),
               LinearRegression()
            ]

 <h4> Results File Setup

In [4]:
inputFilePath = 'Model_Input_Data/'

resultFilePath = 'Audience_Ratings/Results/'
modelFilePath = 'Audience_Ratings/Model_File/'
graphFilePath = 'Audience_Ratings/Research_Graphs/'


textFile = open(resultFilePath+'Prediction Score.txt', "w")
textFile.write("PREDICTING MOVIE RATINGS FROM AUDIENCE BEHAVIOURS ON MOVIE TRAILERS\n\n\n")

70

<h4> Input Setup

In [5]:
def inputSetup(iputFileType):
    
    inputFile = inputFilePath+'Model_Input_Data_'+iputFileType+'.csv'
    
    df_Input = pd.read_csv(inputFile)
    
    y = df_Input['Rating']
    y = df_Input['UserRating']
    X = df_Input
    
    if iputFileType == "4":
        X = df_Input[Selected_Features_4]
    if iputFileType == "5":    
        X = df_Input[Selected_Features_5]
    if iputFileType == "10":
        X = df_Input[Selected_Features_10]
    if iputFileType == "20":   
        X = df_Input[Selected_Features_20]
        
    return X,y

In [6]:
def inputSplit(windowSize):
    
    X,y = inputSetup(windowSize)
    
    train_features, test_features, train_labels, test_labels = train_test_split(X, y,test_size=0.33 ,  random_state=5)
    
    return  train_features, test_features, train_labels, test_labels

 <h4> Plot Graph files

In [7]:
def plotgraph(test_labels,predict_labels,modelResults):
    
    fig = go.Figure(data=go.Scatter(y=test_labels, name='Actual rating'))
    fig.add_trace(go.Scatter(y=predict_labels, name='Predicting rating'))
    
    fig.update_layout(title=modelResults['Classifier']+' RMSE: {:.2f}'.format(modelResults['RMSE']))
    
    fig.write_html( graphFilePath + modelResults['Classifier'] +'_bm.html', auto_open=False)
    fig.write_image(graphFilePath + modelResults['Classifier'] +'_bm.png')

 <h4> Evaluation Matrix to Text

In [8]:
def resultsToText(modelResults):
    
    textFile.writelines(modelResults['Classifier'] 
                        + "\t\t : {}".format(modelResults['MAE']) 
                        + "\t\t : {}".format(modelResults['MSE'])
                        + "\t\t : {}".format(modelResults['RMSE']) 
                        + "\n\n")

<h4> Evaluation Matrix to CSV

In [9]:
def resultsToCSV(modelResults):
    
    resultsToText(modelResults)
    
    df_results = pd.DataFrame(columns = ['Classifier','MAE', 'MSE' , 'RMSE']) 
    
    df_results = df_results.append(modelResults, ignore_index=True)
    
    return df_results

 <h4> Model Train, Test and Evaluation

In [10]:
def classifierTrain(modelInput):
        
    model = modelInput['classifier']
    model.fit(modelInput['train_features'], modelInput['train_labels'])
    predict_labels = model.predict(modelInput['test_features'])
    
    MAE = mean_absolute_error(modelInput['test_labels'], predict_labels)
    MSE = mean_squared_error(modelInput['test_labels'], predict_labels)
    RMSE = math.sqrt(MSE)
    
    modelResults = {'Classifier':modelInput['classifierName'],'MAE': MAE,'MSE': MSE,'RMSE': RMSE}
    
    plotgraph(modelInput['test_labels'],predict_labels,modelResults)

    dump(model, modelFilePath + modelInput['classifierName'] + '_basemodel.joblib')
    
    return resultsToCSV(modelResults)

<h2> Classifier modelling

In [11]:
def classifierModelling(fileNu):
    
    df_Output = pd.DataFrame(columns=['Classifier','MAE', 'MSE' , 'RMSE'])
    
    train_features, test_features, train_labels, test_labels = inputSplit(fileNu)
    
    for index in range(len(classifiersList)): 
        
        classifier = classifiersList[index]
        classifierName = classifierTitleList[index]        
        
        modelInput = {'classifier':classifier,'classifierName':classifierName +"_"+fileNu,
                      'train_features':train_features,'test_features':test_features,
                      'train_labels':train_labels,'test_labels':test_labels}
        
        df_Output = df_Output.append(classifierTrain(modelInput), ignore_index=True)
        
    return df_Output

<h2> Classifier model Testing

In [12]:
fileNum = ['4','5','10']

df_Output = pd.DataFrame(columns=['Classifier','MAE', 'MSE' , 'RMSE'])

for i in range(3):
    
    df_Output = df_Output.append(classifierModelling(fileNum[i]), ignore_index=True)
    
textFile.close()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

In [13]:
df_Output = df_Output.sort_values(by=['RMSE'],ascending=True).round(decimals=3)

In [14]:
df_Output

Unnamed: 0,Classifier,MAE,MSE,RMSE
4,LinearRegression_4,11.974744,214.152328,14.633944
9,LinearRegression_5,11.900757,214.218706,14.636212
14,LinearRegression_10,11.985056,218.902877,14.795367
10,MLPClassifier-adam_10,12.5,224.785714,14.992855
0,MLPClassifier-adam_4,11.65873,254.230159,15.944597
8,LinearDiscriminant_5,12.230159,259.325397,16.103583
2,"SVC(gamma=2, C=1)_4",11.81746,260.18254,16.130175
5,MLPClassifier-adam_5,11.97619,260.18254,16.130175
7,"SVC(gamma=2, C=1)_5",11.81746,260.18254,16.130175
12,"SVC(gamma=2, C=1)_10",11.81746,260.18254,16.130175


In [15]:
df_Output.to_csv(resultFilePath+'BaseResults.csv')

### <h1> Model Validation

In [16]:
df_VP = pd.DataFrame(columns=['Classifier','RMSE_Avg','RMSE_Max','RMSE_Min','StD','Test_RMSE'])
df_Output2 = df_Output.set_index('Classifier')['RMSE']

In [17]:
def ModelValidation(classifierTitle,model,X,y):    
    
    global df_VP    
    
    kf = KFold(n_splits=20, random_state=None, shuffle=True)
    
    X =  X.values
    
    kf.get_n_splits(X)
    ValidationScore = []
    

    for train_index, test_index in kf.split(X, y): 
        x_train_fold, x_test_fold = X[train_index], X[test_index] 
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        
        model.fit(x_train_fold, y_train_fold)
        predict_labels = model.predict(x_test_fold)
        
        MSE = mean_squared_error(y_test_fold, predict_labels)
        RMSEScore = math.sqrt(MSE)
        ValidationScore.append(RMSEScore)
        
    modelResults = {'Classifier':classifierTitle,
                    'RMSE_Avg': mean(ValidationScore),
                    'RMSE_Max': max(ValidationScore),
                    'RMSE_Min': min(ValidationScore),
                    'StD':stdev(ValidationScore),
                    'Test_RMSE': df_Output2.at[classifierTitle],
                   }
    df_VP = df_VP.append(modelResults, ignore_index=True)

In [18]:
def datasetValidation(windowSize): 
    
    X,y = inputSetup(windowSize)
    
    for index in range(len(classifiersList)): 
        
        model = classifiersList[index]
        classifierName = classifierTitleList[index] +"_"+ windowSize
        
        ModelValidation(classifierName,model,X,y)

In [19]:
def validationTest():
    
    windowSize = ['4','5','10']

    for i in range(len(windowSize)):
        
        datasetValidation(windowSize[i])

In [20]:
validationTest()


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.




Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

In [21]:
df_VP = df_VP.sort_values(by=['Test_RMSE'],ascending=True).round(decimals=3)

In [22]:
df_VP.to_csv(resultFilePath+'BaseValidationResults.csv')