# Chess Game Dataset (Lichess)

# Machine Learning Model Training

# 1] Import Section

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

# 2] Data Load

In [2]:
path = r'C:\Users\Mudar Hussain\Documents\Chess Winner Prediction\dataset\Preprocessed_chess_dataset.csv'
df = pd.read_csv(path)

In [3]:
df.head()

Unnamed: 0,rated,turns,winner,opening_ply,victory_status_draw,victory_status_mate,victory_status_outoftime,victory_status_resign,rating_diff_wrt_white
0,0.0,13.0,1.0,5.0,0.0,0.0,1.0,0.0,309.0
1,1.0,16.0,0.0,4.0,0.0,0.0,0.0,1.0,61.0
2,1.0,61.0,1.0,3.0,0.0,1.0,0.0,0.0,-4.0
3,1.0,61.0,1.0,3.0,0.0,1.0,0.0,0.0,-15.0
4,1.0,95.0,1.0,5.0,0.0,1.0,0.0,0.0,54.0


# 3] Split Data

In [4]:
X = df.drop('winner', axis= 1)
y = df['winner']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2020)
 
print('Shape of X_train = ', X_train.shape)
print('Shape of y_train = ', y_train.shape)
print('Shape of X_test = ', X_test.shape)
print('Shape of y_test = ', y_test.shape)

Shape of X_train =  (16046, 8)
Shape of y_train =  (16046,)
Shape of X_test =  (4012, 8)
Shape of y_test =  (4012,)


# 4] Feature Scaling

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
sc = StandardScaler()

In [8]:
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

# 5] Model Creation

5.1] Import Section

In [9]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier

In [10]:
xgb = XGBClassifier()
model_xgb = xgb.fit(X_train, y_train)
model_xgb.score(X_test, y_test)

0.8900797607178464

In [11]:
RFC = RandomForestClassifier()
model_rfc = RFC.fit(X_train, y_train)
model_rfc.score(X_test, y_test)

0.6944167497507477

In [12]:
DTC = DecisionTreeClassifier()
model_dtc = DTC.fit(X_train, y_train)
model_dtc.score(X_test, y_test)

0.7554835493519442

In [13]:
LGBM = LGBMClassifier()
model_lgbm = LGBM.fit(X_train, y_train)
model_lgbm.score(X_test, y_test)

0.8778664007976071

In [14]:
BAG = BaggingClassifier()
model_bag = BAG.fit(X_train, y_train)
model_bag.score(X_test, y_test)

0.7734297108673978

In [15]:
KNC = KNeighborsClassifier()
model_knc = KNC.fit(X_train, y_train)
model_knc.score(X_test, y_test)

0.6296111665004985

# 6] Model Evaluation

In [16]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

Accuracy

In [17]:
def get_accuracy(model):
    return model.score(X_test, y_test)

Root Mean Square Error

In [18]:
def get_rmse(model):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    return rmse

R-Squared

In [19]:
def get_rscore(model):
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

Score Card

In [20]:
score_card = pd.DataFrame(columns = ['Model_Name', 'Accuracy', 'Root Mean Sq. Error', 'R-Squared'])

In [21]:
def update_score_card(model_name, model):
    global score_card
    score_card = score_card.append({'Model_Name': model_name, 'Accuracy': get_accuracy(model), 'Root Mean Sq. Error': get_rmse(model), 'R-Squared': get_rscore(model)}, ignore_index=True)

Updating Score Card for all models

In [22]:
update_score_card('XGB Classifier', model_xgb)
update_score_card('Random Forest Classifier', model_rfc)
update_score_card('Decision Tree Classifier', model_dtc)
update_score_card('LGBM Classifier', model_lgbm)
update_score_card('Bagging Classifier', model_bag)
update_score_card('K Neighbors Classifier', model_knc)

In [23]:
score_card

Unnamed: 0,Model_Name,Accuracy,Root Mean Sq. Error,R-Squared
0,XGB Classifier,0.89008,0.331542,0.559988
1,Random Forest Classifier,0.694417,0.552796,-0.223253
2,Decision Tree Classifier,0.755484,0.494486,0.021198
3,LGBM Classifier,0.877866,0.349476,0.511098
4,Bagging Classifier,0.77343,0.475994,0.093037
5,K Neighbors Classifier,0.629611,0.608596,-0.48267


In [24]:
score_card.sort_values("Accuracy", ascending = False, ignore_index=True)

Unnamed: 0,Model_Name,Accuracy,Root Mean Sq. Error,R-Squared
0,XGB Classifier,0.89008,0.331542,0.559988
1,LGBM Classifier,0.877866,0.349476,0.511098
2,Bagging Classifier,0.77343,0.475994,0.093037
3,Decision Tree Classifier,0.755484,0.494486,0.021198
4,Random Forest Classifier,0.694417,0.552796,-0.223253
5,K Neighbors Classifier,0.629611,0.608596,-0.48267


# 7] Save ML Models using Joblib

In [25]:
import joblib

To save ML Model to file

In [26]:
joblib.dump(model_xgb,r'.\ML_models\XGB Classifier')
joblib.dump(model_rfc,r'.\ML_models\Random Forest Classifier')
joblib.dump(model_dtc,r'.\ML_models\Decision Tree Classifier')
joblib.dump(model_lgbm,r'.\ML_models\LGBM Classifier')
joblib.dump(model_bag,r'.\ML_models\Bagging Classifier')
joblib.dump(model_knc,r'.\ML_models\K Neighbors Classifier')

['.\\ML_models\\K Neighbors Classifier']

To load the trained model from file

In [27]:
xgb_model_path = r'.\ML_models\XGB Classifier'
xgb_model = joblib.load(xgb_model_path)

In [28]:
xgb_model.score(X_test, y_test)

0.8900797607178464