In [1]:
# initialisation
import pandas as pd
import numpy as np
import plotly.express as px
import itertools
import joblib
from scipy.stats import *
from pandasgui import show
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
model = joblib.load('gbrmodel.joblib')
data = pd.read_pickle('../data/processed_data.pkl')
y_test = pd.read_pickle('../data/y_test.pkl')
X_test = pd.read_pickle('../data/X_test.pkl')
X = pd.read_pickle('../data/X.pkl')
y = pd.read_pickle('../data/y.pkl')

In [3]:
# obtain feature importances
importance = model.feature_importances_
importances_df = pd.DataFrame([importance], columns = X.columns)
importances_df = importances_df.transpose()
importances_df.to_csv('../data/feature_importances.csv') # send importances to csv
importances_df


Unnamed: 0,0
Reviewscore,0.604337
Launch Year,0.018757
Play Year,0.036532
DLC/ Major update played,0.029255
Remaster Reviewscore,0.002028
...,...
Any multiplayer,0.000665
Any racing,0.000000
Any sports,0.000000
Any simulation,0.000000


In [4]:
# analyse performance using test set
test_labels = pd.merge(y_test, data['Game'], left_index= True, right_index = True) #join the game name back in to rating scores
test_scores = pd.DataFrame({'Game': test_labels['Game'], 'Predicted Rating':model.predict(X_test), 'Actual Rating': test_labels['Rating']}) #join the test set to the predictions
test_scores.to_csv('../data/test_scores_output.csv')

In [5]:
# scatter plot of scores vs actual for test set
fig = px.scatter(test_scores, x="Actual Rating", y="Predicted Rating", hover_name= 'Game')
fig

In [6]:
#obtain plot of predicted rating vs review score
all_labels = pd.merge(y, data['Game'], left_index= True, right_index = True) #join the game name back in to rating scores
scores_labels = pd.merge(all_labels, data['Reviewscore'], left_index= True, right_index = True) #join the review avg back in to rating scores
scores_comp = pd.DataFrame({'Game': scores_labels['Game'], 
                            'Predicted Rating': model.predict(X), 
                            'Actual Rating': scores_labels['Rating'], 
                            'Review avg': scores_labels['Reviewscore']}) #join the test set to the predictions

In [7]:
# scatter plot
fig = px.scatter(scores_comp, x="Review avg", y="Predicted Rating", hover_name= 'Game')
fig