 <center><h2> PREDICTING MOVIE RATINGS FROM AUDIENCE BEHAVIOURS ON MOVIE TRAILERS

### <center><h2> Research Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LinearRegression
from joblib import dump
import plotly.graph_objects as go
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from os import path

<h4>Feature Selection

In [2]:
Selected_Features_10 = ['motion_image_high','motion_image_low','blink_rate','lean_forward','lean_backward','head_rotation',
                        'eye_rotation','Happiness_Avg','Happiness_Std','Happiness_25P','Happiness_75P','Happiness_Peaks',
                        'Happiness_0','Happiness_10','Happiness_20','Happiness_30','Happiness_40','Happiness_50','Happiness_60',
                        'Happiness_70']

Selected_Features_5 = ['motion_image_high','motion_image_low','blink_rate','lean_forward','lean_backward','head_rotation',
                       'eye_rotation','Happiness_Avg','Happiness_Std','Happiness_25P','Happiness_75P','Happiness_Peaks',
                       'Happiness_0','Happiness_20','Happiness_40','Happiness_60','Happiness_80','Sadness_Avg','Sadness_Std',
                       'Sadness_25P']

Selected_Features_4 = ['motion_image_high','motion_image_low','blink_rate','lean_forward','lean_backward','head_rotation',
                       'eye_rotation','Happiness_Avg','Happiness_Std','Happiness_25P','Happiness_75P','Happiness_Peaks',
                       'Happiness_0','Happiness_25','Happiness_50','Happiness_75','Sadness_Avg','Sadness_Std','Sadness_25P',
                       'Sadness_75P']

 <h4>classifier set

In [3]:
classifierTitle = ['DecisionTreeClassifier', 'MLPClassifier-sgd', 'MLPClassifier-adam', 'KNeighborsClassifier',
                   'SVC-linear(C=0.025)', 'SVC(gamma=2, C=1)', 'RandomForestClassifier', 'AdaBoostClassifier',
                   'GaussianNB', 'LinearDiscriminant', 'LinearRegression']

# Define the model
classifiers = [ DecisionTreeClassifier(max_depth=5),
               MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, learning_rate_init=0.001,
                  batch_size=64, max_iter=100, verbose=False),
               MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1),
               KNeighborsClassifier(2),
               SVC(kernel="linear", C=0.025),
               SVC(gamma=2, C=1),
               RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
               AdaBoostClassifier(),
               GaussianNB(),
               LinearDiscriminantAnalysis(),
               LinearRegression(n_jobs=-1)]

 <h4> Results File Setup

In [4]:
inputFilePath = 'C:/Users/Shehan Janeth/Documents/Pre_Processed_Data/Model_Input_Data/'


resultFilePath = 'C:/Users/Shehan Janeth/Documents/Research Output/Audience Ratings/Research Model/'
modelFilePath = 'C:/Users/Shehan Janeth/Documents/Research Output/Audience Ratings/Research Model/Model_File/'
graphFilePath = 'C:/Users/Shehan Janeth/Documents/Research Output/Audience Ratings/Research Model/Research_Graphs/'

textFile = open(resultFilePath+'Prediction Score.txt', "w")
textFile.write("PREDICTING MOVIE RATINGS FROM AUDIENCE BEHAVIOURS ON MOVIE TRAILERS\n\n\n")

70

<h4> Input Setup

In [5]:
def inputSetup(iputFileType):
    
    inputFile = inputFilePath+'Model_Input_Data_'+iputFileType+'.csv'
    
    df_Input = pd.read_csv(inputFile)
    
    #y = df_Input['Rating']
    y = df_Input['UserRating']
    X = df_Input
    
    if iputFileType == "4":
        X = df_Input[Selected_Features_4]
    if iputFileType == "5":    
        X = df_Input[Selected_Features_5]
    if iputFileType == "10":
        X = df_Input[Selected_Features_10]
    if iputFileType == "20":   
        X = df_Input[Selected_Features_20]
    
    train_features, test_features, train_labels, test_labels = train_test_split(X, y,test_size=0.3)
    
    return  train_features, test_features, train_labels, test_labels

 <h4> Plot Graph files

In [6]:
def plotgraph(test_labels,predict_labels,modelResults):
    
    fig = go.Figure(data=go.Scatter(y=test_labels, name='Actual rating'))
    fig.add_trace(go.Scatter(y=predict_labels, name='Predicting rating'))
    
    fig.update_layout(title=modelResults['Classifier']+' RMSE: {:.2f}'.format(modelResults['RMSE']))
    
    #fig.write_html( graphFilePath + modelResults['Classifier'] +'.html', auto_open=False)
    fig.write_image(graphFilePath + modelResults['Classifier'] +'.png')

 <h4> Evaluation Matrix to Text

In [7]:
def resultsToText(modelResults):
    
    textFile.writelines(modelResults['Classifier'] 
                        + "\t\t : {}".format(modelResults['MAE']) 
                        + "\t\t : {}".format(modelResults['MSE'])
                        + "\t\t : {}".format(modelResults['RMSE']) 
                        + "\n\n")

<h4> Evaluation Matrix to CSV

In [8]:
def resultsToCSV(modelResults):
    
    resultsToText(modelResults)
    
    df_results = pd.DataFrame(columns = ['Classifier','MAE', 'MSE' , 'RMSE']) 
    
    df_results = df_results.append(modelResults, ignore_index=True)
    
    return df_results

 <h4> Model Train, Test and Evaluation

In [9]:
def classifierTrain(modelInput):
    
    model = modelInput['classifier']
    model.fit(modelInput['train_features'], modelInput['train_labels'])
    predict_labels = model.predict(modelInput['test_features'])
    
    MAE = mean_absolute_error(modelInput['test_labels'], predict_labels)
    MSE = mean_squared_error(modelInput['test_labels'], predict_labels)
    RMSE = math.sqrt(MSE)
    
    modelResults = {'Classifier':modelInput['classifierName'],'MAE': MAE,'MSE': MSE,'RMSE': RMSE}
    
    plotgraph(modelInput['test_labels'],predict_labels,modelResults)

    dump(model, modelFilePath + modelInput['classifierName'] + '_model.joblib')
    
    return resultsToCSV(modelResults)

<h2> Classifier modelling

In [10]:
def classifierModelling(fileNu):
    
    df_Output = pd.DataFrame(columns=['Classifier','MAE', 'MSE' , 'RMSE'])
    
    train_features, test_features, train_labels, test_labels = inputSetup(fileNu)
    
    for classifier_index in range(len(classifiers)): 
        
        classifier = classifiers[classifier_index]
        classifierName = classifierTitle[classifier_index]        
        
        modelInput = {'classifier':classifier,'classifierName':classifierName +"_"+fileNu,
                      'train_features':train_features,'test_features':test_features,
                      'train_labels':train_labels,'test_labels':test_labels}
        
        df_Output = df_Output.append(classifierTrain(modelInput), ignore_index=True)
        
    return df_Output

### <h1>Main

In [11]:
fileNum = ['4','5','10']

df_Output = pd.DataFrame(columns=['Classifier','MAE', 'MSE' , 'RMSE'])

for i in range(3):
    
    df_Output = df_Output.append(classifierModelling(fileNum[i]), ignore_index=True)
    
textFile.close()


Stochastic Optimizer: Maximum iterations (100) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (100) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (100) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.



In [12]:
df_Output

Unnamed: 0,Classifier,MAE,MSE,RMSE
0,DecisionTreeClassifier_4,15.307018,408.061404,20.20053
1,MLPClassifier-sgd_4,14.394737,348.72807,18.674262
2,MLPClassifier-adam_4,17.008772,477.324561,21.847759
3,KNeighborsClassifier_4,15.789474,392.438596,19.810063
4,SVC-linear(C=0.025)_4,14.254386,346.096491,18.603669
5,"SVC(gamma=2, C=1)_4",14.254386,346.096491,18.603669
6,RandomForestClassifier_4,14.184211,339.868421,18.435521
7,AdaBoostClassifier_4,14.605263,359.429825,18.958635
8,GaussianNB_4,20.929825,665.473684,25.796777
9,LinearDiscriminant_4,15.184211,397.675439,19.941801


In [13]:
df_Output.to_csv(resultFilePath+'Results.csv')