In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [5]:
df_sample = pd.read_csv('../csv/bez_remisow.csv')

In [6]:
df_sample.head()

Unnamed: 0,Result,WhiteElo,BlackElo,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,1721,1746,1,311,1,5
1,1,1319,1182,2,101,1,13
2,1,2007,1999,0,291,0,6
3,1,1734,1546,1,200,0,3
4,1,2148,1580,0,46,0,6


In [7]:
df_sample.shape

(5802056, 7)

In [8]:
df_sample_features = df_sample.drop('Result', axis = 1)

In [9]:
X = df_sample_features.values
y = df_sample['Result'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 42)

# Podział

In [10]:
df_blitz = df_sample[df_sample['Event_enc'] == 0]
df_bullet = df_sample[df_sample['Event_enc'] == 1]
df_classical = df_sample[df_sample['Event_enc'] == 2]
df_correspondence = df_sample[df_sample['Event_enc'] == 3]

In [11]:
df_blitz_features = df_blitz.drop('Result', axis = 1)
df_bullet_features = df_bullet.drop('Result', axis = 1)
df_classical_features = df_classical.drop('Result', axis = 1)
df_correspondence_features = df_correspondence.drop('Result', axis = 1)

# DT

### BASIC

In [12]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

57.3106793104518
              precision    recall  f1-score   support

           1    0.57334   0.57574   0.57454    290470
           2    0.57287   0.57046   0.57167    289736

    accuracy                        0.57311    580206
   macro avg    0.57311   0.57310   0.57310    580206
weighted avg    0.57311   0.57311   0.57310    580206



[(array([1817, 1697,    1,  305,    0,    5]), 32.161830671810414),
 (array([2279, 2221,    0,  103,    0,    6]), 27.7314936364096),
 (array([1998, 1937,    1,  203,    1,    3]), 2.7202926287137124),
 (array([1802, 1894,    2,  107,    0,    8]), 24.018812549100442),
 (array([1883, 1604,    2,  265,    0,   13]), 2.212548351513568),
 (array([1857, 1807,    2,  262,    1,   18]), 11.155022162452259)]

### BLITZ

In [13]:
X_blitz = df_blitz_features.values
y_blitz = df_blitz['Result'].values

X_blitz_train, X_blitz_test, y_blitz_train, y_blitz_test = train_test_split(X_blitz, y_blitz, test_size=0.1, random_state = 42)

In [14]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = decision_tree_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = decision_tree_classifier.predict(X_blitz_test)
print(decision_tree_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

56.46620849854435
              precision    recall  f1-score   support

           1    0.56406   0.56814   0.56609    129969
           2    0.56527   0.56119   0.56322    130052

    accuracy                        0.56466    260021
   macro avg    0.56467   0.56466   0.56466    260021
weighted avg    0.56467   0.56466   0.56466    260021



[(array([1817, 1697,    1,  305,    0,    5]), 33.36810599549453),
 (array([2279, 2221,    0,  103,    0,    6]), 29.838830692271372),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 27.24062607711576),
 (array([1883, 1604,    2,  265,    0,   13]), 2.5662159626891468),
 (array([1857, 1807,    2,  262,    1,   18]), 6.986221272429194)]

### BULLET

In [15]:
X_bullet = df_bullet_features.values
y_bullet = df_bullet['Result'].values

X_bullet_train, X_bullet_test, y_bullet_train, y_bullet_test = train_test_split(X_bullet, y_bullet, test_size=0.1, random_state = 42)

In [16]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = decision_tree_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = decision_tree_classifier.predict(X_bullet_test)
print(decision_tree_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

59.34103635450234
              precision    recall  f1-score   support

           1    0.58860   0.59604   0.59230     80879
           2    0.59825   0.59083   0.59452     82347

    accuracy                        0.59341    163226
   macro avg    0.59343   0.59343   0.59341    163226
weighted avg    0.59347   0.59341   0.59342    163226



[(array([1817, 1697,    1,  305,    0,    5]), 36.41238365728933),
 (array([2279, 2221,    0,  103,    0,    6]), 31.071920896428896),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 23.258094188306515),
 (array([1883, 1604,    2,  265,    0,   13]), 2.4875094049017554),
 (array([1857, 1807,    2,  262,    1,   18]), 6.77009185307349)]

### CLASSICAL

In [17]:
X_classical = df_classical_features.values
y_classical = df_classical['Result'].values

X_classical_train, X_classical_test, y_classical_train, y_classical_test = train_test_split(X_classical, y_classical, test_size=0.1, random_state = 42)

In [18]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = decision_tree_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = decision_tree_classifier.predict(X_classical_test)
print(decision_tree_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

56.94166548336711
              precision    recall  f1-score   support

           1    0.57017   0.57385   0.57200     77684
           2    0.56865   0.56496   0.56680     77250

    accuracy                        0.56942    154934
   macro avg    0.56941   0.56940   0.56940    154934
weighted avg    0.56941   0.56942   0.56941    154934



[(array([1817, 1697,    1,  305,    0,    5]), 33.44649647938824),
 (array([2279, 2221,    0,  103,    0,    6]), 29.478500498764397),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 23.071756027363737),
 (array([1883, 1604,    2,  265,    0,   13]), 1.9691580143350218),
 (array([1857, 1807,    2,  262,    1,   18]), 12.034088980148617)]

### CORRESPONDENCE

In [19]:
X_correspondence = df_correspondence_features.values
y_correspondence = df_correspondence['Result'].values

X_correspondence_train, X_correspondence_test, y_correspondence_train, y_correspondence_test = train_test_split(X_correspondence, y_correspondence, test_size=0.1, random_state = 42)

In [20]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = decision_tree_classifier.predict(X_correspondence_test)
print(decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

57.22742969906266
              precision    recall  f1-score   support

           1    0.57143   0.55611   0.56366      1007
           2    0.57307   0.58824   0.58055      1020

    accuracy                        0.57227      2027
   macro avg    0.57225   0.57217   0.57211      2027
weighted avg    0.57225   0.57227   0.57216      2027



[(array([1817, 1697,    1,  305,    0,    5]), 34.89910771524649),
 (array([2279, 2221,    0,  103,    0,    6]), 36.22230351762351),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 26.442261634251864),
 (array([1883, 1604,    2,  265,    0,   13]), 2.436327132878157),
 (array([1857, 1807,    2,  262,    1,   18]), 0.0)]

### DT - STROJONY

In [21]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=7, max_features=6)
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

64.87764690472004
              precision    recall  f1-score   support

           1    0.64770   0.65436   0.65101    290470
           2    0.64988   0.64318   0.64651    289736

    accuracy                        0.64878    580206
   macro avg    0.64879   0.64877   0.64876    580206
weighted avg    0.64879   0.64878   0.64876    580206



[(array([1817, 1697,    1,  305,    0,    5]), 64.44711388193683),
 (array([2279, 2221,    0,  103,    0,    6]), 35.54357670129066),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 0.0),
 (array([1883, 1604,    2,  265,    0,   13]), 0.009309416772510647),
 (array([1857, 1807,    2,  262,    1,   18]), 0.0)]

# RF

In [22]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

### BASIC

In [23]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

62.451956718820554
              precision    recall  f1-score   support

           1    0.62371   0.63018   0.62693    290470
           2    0.62535   0.61884   0.62208    289736

    accuracy                        0.62452    580206
   macro avg    0.62453   0.62451   0.62450    580206
weighted avg    0.62453   0.62452   0.62451    580206



[(array([1817, 1697,    1,  305,    0,    5]), 36.88807169030987),
 (array([2279, 2221,    0,  103,    0,    6]), 36.39559384872172),
 (array([1998, 1937,    1,  203,    1,    3]), 0.8450858724376743),
 (array([1802, 1894,    2,  107,    0,    8]), 19.59930457014486),
 (array([1883, 1604,    2,  265,    0,   13]), 0.5271263038201085),
 (array([1857, 1807,    2,  262,    1,   18]), 5.744817714565775)]

### BLITZ

In [24]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = rf_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = rf_classifier.predict(X_blitz_test)
print(rf_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

61.08083577864865
              precision    recall  f1-score   support

           1    0.60906   0.61812   0.61356    129969
           2    0.61261   0.60350   0.60802    130052

    accuracy                        0.61081    260021
   macro avg    0.61083   0.61081   0.61079    260021
weighted avg    0.61083   0.61081   0.61079    260021



[(array([1817, 1697,    1,  305,    0,    5]), 37.35003431292076),
 (array([2279, 2221,    0,  103,    0,    6]), 37.54115017855261),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 21.971243037306934),
 (array([1883, 1604,    2,  265,    0,   13]), 0.3938233746889257),
 (array([1857, 1807,    2,  262,    1,   18]), 2.7437490965307676)]

### BULLET

In [25]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = rf_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = rf_classifier.predict(X_bullet_test)
print(rf_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

64.93021945033267
              precision    recall  f1-score   support

           1    0.64480   0.65066   0.64772     80879
           2    0.65380   0.64797   0.65087     82347

    accuracy                        0.64930    163226
   macro avg    0.64930   0.64931   0.64930    163226
weighted avg    0.64934   0.64930   0.64931    163226



[(array([1817, 1697,    1,  305,    0,    5]), 39.75061670714437),
 (array([2279, 2221,    0,  103,    0,    6]), 39.128000759980644),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 18.58153693828725),
 (array([1883, 1604,    2,  265,    0,   13]), 0.39159570184980713),
 (array([1857, 1807,    2,  262,    1,   18]), 2.148249892737938)]

### CLASSICAL

In [26]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = rf_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = rf_classifier.predict(X_classical_test)
print(rf_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

61.96767655905096
              precision    recall  f1-score   support

           1    0.61799   0.63237   0.62510     77684
           2    0.62145   0.60691   0.61409     77250

    accuracy                        0.61968    154934
   macro avg    0.61972   0.61964   0.61960    154934
weighted avg    0.61972   0.61968   0.61961    154934



[(array([1817, 1697,    1,  305,    0,    5]), 37.10450774853671),
 (array([2279, 2221,    0,  103,    0,    6]), 35.67125933687863),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 18.49620709992189),
 (array([1883, 1604,    2,  265,    0,   13]), 0.4324550426685643),
 (array([1857, 1807,    2,  262,    1,   18]), 8.29557077199419)]

### CORRESPONDENCE

In [27]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = rf_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = rf_classifier.predict(X_correspondence_test)
print(rf_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

60.286137148495314
              precision    recall  f1-score   support

           1    0.59768   0.61370   0.60559      1007
           2    0.60826   0.59216   0.60010      1020

    accuracy                        0.60286      2027
   macro avg    0.60297   0.60293   0.60284      2027
weighted avg    0.60300   0.60286   0.60282      2027



[(array([1817, 1697,    1,  305,    0,    5]), 37.36238101481473),
 (array([2279, 2221,    0,  103,    0,    6]), 36.38615208394349),
 (array([1998, 1937,    1,  203,    1,    3]), 0.0),
 (array([1802, 1894,    2,  107,    0,    8]), 25.13391805441622),
 (array([1883, 1604,    2,  265,    0,   13]), 1.1175488468255508),
 (array([1857, 1807,    2,  262,    1,   18]), 0.0)]

### RF - STROJONY

In [28]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

65.27957311713426
              precision    recall  f1-score   support

           1    0.65023   0.66324   0.65667    290470
           2    0.65548   0.64232   0.64883    289736

    accuracy                        0.65280    580206
   macro avg    0.65285   0.65278   0.65275    580206
weighted avg    0.65285   0.65280   0.65276    580206



[(array([1817, 1697,    1,  305,    0,    5]), 50.21877957522495),
 (array([2279, 2221,    0,  103,    0,    6]), 46.6144242736865),
 (array([1998, 1937,    1,  203,    1,    3]), 0.14694185824874162),
 (array([1802, 1894,    2,  107,    0,    8]), 2.0089063017708155),
 (array([1883, 1604,    2,  265,    0,   13]), 0.3630295809237678),
 (array([1857, 1807,    2,  262,    1,   18]), 0.6479184101452297)]