In [1]:
import pandas as pd
import numpy as np

# Open database

In [2]:
from utils import (
    read_database,
    train_test_split_by_game_id,
    filter_dataset,
    get_features_and_labels
)

In [3]:
database_directory = "/home/database"
score_threshold = 100

In [4]:
white_df, black_df = read_database(database_directory)

white_df_filtered = filter_dataset(white_df, score_threshold)
black_df_filtered = filter_dataset(black_df, score_threshold)

trn_w, tst_w, trn_b, tst_b = train_test_split_by_game_id(white_df_filtered, black_df_filtered)

In [5]:
print("white dataset:", white_df_filtered.shape[0])
print("black dataset:", black_df_filtered.shape[0])
print("white train  :", trn_w.shape[0])
print("black train  :", trn_b.shape[0])
print("white test   :", tst_w.shape[0])
print("black test   :", tst_b.shape[0])

white dataset: 106268
black dataset: 104345
white train  : 75602
black train  : 74199
white test   : 30666
black test   : 30146


# Classification

### logistic regression

In [6]:
from sklearn.linear_model import LogisticRegression

def test_lr(trn, tst, **kwargs):
    X_trn, y_trn = get_features_and_labels(trn)
    X_tst, y_tst = get_features_and_labels(tst)
    return LogisticRegression(random_state=1, **kwargs).fit(X_trn, y_trn).score(X_tst, y_tst)

In [7]:
print(f"white: {test_lr(trn_w, tst_w, max_iter=400):.2f}")
print(f"black: {test_lr(trn_b, tst_b, max_iter=400):.2f}")

white: 0.87
black: 0.86


### Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier

def test_rf(trn, tst, **kwargs):
    X_trn, y_trn = get_features_and_labels(trn)
    X_tst, y_tst = get_features_and_labels(tst)
    return RandomForestClassifier(random_state=1, **kwargs).fit(X_trn, y_trn).score(X_tst, y_tst)

In [9]:
print(f"white: {test_rf(trn_w, tst_w, n_estimators=200):.2f}")
print(f"black: {test_rf(trn_b, tst_b, n_estimators=200):.2f}")

white: 0.69
black: 0.68


### xgboost

In [10]:
from xgboost import XGBClassifier

def test_xgb(trn, tst):
    X_trn, y_trn = get_features_and_labels(trn)
    X_tst, y_tst = get_features_and_labels(tst)
    return XGBClassifier().fit(X_trn, y_trn).score(X_tst, y_tst)

In [11]:
print(f"white: {test_xgb(trn_w, tst_w):.2f}")
print(f"black: {test_xgb(trn_b, tst_b):.2f}")

white: 0.71
black: 0.70


### SVC

In [None]:
from sklearn.svm import SVC

def test_svc(trn, tst, **kwargs):
    X_trn, y_trn = get_features_and_labels(trn)
    X_tst, y_tst = get_features_and_labels(tst)
    return SVC(**kwargs).fit(X_trn, y_trn).score(X_tst, y_tst)

In [None]:
print(f"white: {test_svc(trn_w, tst_w, kernel='poly', degree=2):.2f}")
print(f"black: {test_svc(trn_b, tst_b, kernel='poly', degree=2):.2f}")

In [None]:
print(f"white: {test_svc(trn_w, tst_w, kernel='rbf', gamma='scale', C=30):.2f}")
print(f"black: {test_svc(trn_b, tst_b, kernel='rbf', gamma='scale', C=30):.2f}")