In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

import warnings
warnings.filterwarnings(action='ignore')

In [3]:
data = pd.read_csv('../input/ps4-games/games_data.csv')
data

Unnamed: 0.1,Unnamed: 0,game,score,leaderbord,gamers,comp_perc,rating,url,min_comp_time,max_comp_time
0,0,A Boy and His Blob,638,2.02,2194,16.5,3.2,https://www.truetrophies.com/game/A-Boy-and-Hi...,15,20
1,1,A Hat in Time,1992,1.53,7062,35.9,4.2,https://www.truetrophies.com/game/A-Hat-in-Tim...,15,20
2,2,A Hero and a Garden,1364,1.01,503,97.6,5.0,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
3,3,A Hero and a Garden (EU),1363,1.01,581,97.8,2.9,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
4,4,A King's Tale: Final Fantasy XV,637,2.02,21914,14.1,3.3,https://www.truetrophies.com/game/A-Kings-Tale...,4,5
...,...,...,...,...,...,...,...,...,...,...
1579,1579,36 Fragments of Midnight,1367,1.06,8472,82.3,2.5,https://www.truetrophies.com/game/36-Fragments...,0,1
1580,1580,36 Fragments of Midnight (Asia),1335,1.03,2131,88.9,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1581,1581,36 Fragments of Midnight (EU),1382,1.07,12273,79.2,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1582,1582,428: Shibuya Scramble,1943,1.47,916,41.5,4.2,https://www.truetrophies.com/game/428-Shibuya-...,40,50


In [4]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop unused columns
    df = df.drop(['Unnamed: 0', 'game', 'url'], axis=1)
    
    # Split df into X and y
    y = df['rating']
    X = df.drop('rating', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test

In [5]:
X_train, X_test, y_train, y_test = preprocess_inputs(data)

In [6]:
models = {
    "                     Linear Regression": LinearRegression(),
    " Linear Regression (L2 Regularization)": Ridge(),
    " Linear Regression (L1 Regularization)": Lasso(),
    "                   K-Nearest Neighbors": KNeighborsRegressor(),
    "                        Neural Network": MLPRegressor(),
    "                         Decision Tree": DecisionTreeRegressor(),
    "                         Random Forest": RandomForestRegressor(),
    "                     Gradient Boosting": GradientBoostingRegressor(),
    "                               XGBoost": XGBRegressor(),
    "                              LightGBM": LGBMRegressor(),
    "                              CatBoost": CatBoostRegressor(verbose=0)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

                     Linear Regression trained.
 Linear Regression (L2 Regularization) trained.
 Linear Regression (L1 Regularization) trained.
                   K-Nearest Neighbors trained.
                        Neural Network trained.
                         Decision Tree trained.
                         Random Forest trained.
                     Gradient Boosting trained.
                               XGBoost trained.
                              LightGBM trained.
                              CatBoost trained.


In [7]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    rmse = np.sqrt(np.mean((y_test - y_pred)**2))
    print(name + " RMSE: {:.4f}".format(rmse))

                     Linear Regression RMSE: 0.6761
 Linear Regression (L2 Regularization) RMSE: 0.6759
 Linear Regression (L1 Regularization) RMSE: 0.9657
                   K-Nearest Neighbors RMSE: 0.6672
                        Neural Network RMSE: 0.6361
                         Decision Tree RMSE: 0.8168
                         Random Forest RMSE: 0.6301
                     Gradient Boosting RMSE: 0.6253
                               XGBoost RMSE: 0.7027
                              LightGBM RMSE: 0.6457
                              CatBoost RMSE: 0.6454


In [8]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    r2 = 1 - (np.sum((y_test - y_pred)**2) / np.sum((y_test - y_test.mean())**2))
    print(name + " R^2: {:.5f}".format(r2))

                     Linear Regression R^2: 0.50952
 Linear Regression (L2 Regularization) R^2: 0.50971
 Linear Regression (L1 Regularization) R^2: -0.00069
                   K-Nearest Neighbors R^2: 0.52226
                        Neural Network R^2: 0.56586
                         Decision Tree R^2: 0.28404
                         Random Forest R^2: 0.57398
                     Gradient Boosting R^2: 0.58044
                               XGBoost R^2: 0.47011
                              LightGBM R^2: 0.55260
                              CatBoost R^2: 0.55300


In [12]:
modell = []
rmsel = []
r2l = []
for name, model in models.items():
    y_pred = model.predict(X_test)
    rmse = np.sqrt(np.mean((y_test - y_pred)**2))
#     print(name + " RMSE: {:.4f}".format(rmse))
    r2 = 1 - (np.sum((y_test - y_pred)**2) / np.sum((y_test - y_test.mean())**2))
#     print(name + " R^2: {:.5f}".format(r2))
    modell.append(name)
    rmsel.append(rmse)
    r2l.append(r2)
df = pd.DataFrame({'ModelName': modell, 'RMSE': rmsel, 'R_squared': r2l})
df

Unnamed: 0,ModelName,RMSE,R_squared
0,Linear Regression,0.676078,0.50952
1,Linear Regression (L2 Regularization),0.675945,0.509711
2,Linear Regression (L1 Regularization),0.965687,-0.000693
3,K-Nearest Neighbors,0.667241,0.522257
4,Neural Network,0.636063,0.565861
5,Decision Tree,0.816827,0.284041
6,Random Forest,0.630087,0.573981
7,Gradient Boosting,0.625294,0.580437
8,XGBoost,0.702712,0.470113
9,LightGBM,0.645702,0.552603
