In [83]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [84]:
df_sample = pd.read_csv('../csv/df_sample.csv')

In [85]:
df_sample['EloDiff'] = df_sample['WhiteElo'] - df_sample['BlackElo']
df_sample.drop(['BlackElo','WhiteElo'],axis=1,inplace=True)

In [86]:
df_sample.head()

Unnamed: 0,Result,Event_enc,ECO_enc,Termination_enc,TimeControl_enc,EloDiff
0,1,1,311,1,5,-25
1,1,2,101,1,13,137
2,1,0,291,0,6,8
3,1,1,200,0,3,188
4,1,0,46,0,6,568


In [87]:
df_sample.shape

(561486, 6)

In [88]:
df_sample_features = df_sample.drop('Result', axis = 1)

In [89]:
X = df_sample_features.values
y = df_sample['Result'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 42)

# Podział

In [90]:
df_blitz = df_sample[df_sample['Event_enc'] == 0]
df_bullet = df_sample[df_sample['Event_enc'] == 1]
df_classical = df_sample[df_sample['Event_enc'] == 2]
df_correspondence = df_sample[df_sample['Event_enc'] == 3]

In [91]:
df_blitz_features = df_blitz.drop('Result', axis = 1)
df_bullet_features = df_bullet.drop('Result', axis = 1)
df_classical_features = df_classical.drop('Result', axis = 1)
df_correspondence_features = df_correspondence.drop('Result', axis = 1)

# DT

### BASIC

In [92]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

41.85826996028425
              precision    recall  f1-score   support

           1    0.40370   0.45298   0.42692     18716
           2    0.41823   0.41092   0.41454     18651
           3    0.43756   0.39192   0.41348     18782

    accuracy                        0.41858     56149
   macro avg    0.41983   0.41861   0.41831     56149
weighted avg    0.41985   0.41858   0.41831     56149



[(array([  0, 304,   1,   8, 242]), 3.4773348223767835),
 (array([  1, 100,   0,   3,  71]), 30.32440100521802),
 (array([  2, 101,   0,   8,  40]), 9.746563836700432),
 (array([   1,    0,    0,    1, -222]), 16.026421821316198),
 (array([  1, 302,   0,   3,  -8]), 40.42527851438855)]

### BLITZ

In [93]:
X_blitz = df_blitz_features.values
y_blitz = df_blitz['Result'].values

X_blitz_train, X_blitz_test, y_blitz_train, y_blitz_test = train_test_split(X_blitz, y_blitz, test_size=0.1, random_state = 42)

In [94]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = decision_tree_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = decision_tree_classifier.predict(X_blitz_test)
print(decision_tree_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

41.0097192224622
              precision    recall  f1-score   support

           1    0.38457   0.43765   0.40940      8340
           2    0.40835   0.39698   0.40258      8406
           3    0.44114   0.39708   0.41795      9182

    accuracy                        0.41010     25928
   macro avg    0.41135   0.41057   0.40998     25928
weighted avg    0.41231   0.41010   0.41022     25928



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 37.479382453287805),
 (array([  2, 101,   0,   8,  40]), 9.621008985858497),
 (array([   1,    0,    0,    1, -222]), 9.713009021730738),
 (array([  1, 302,   0,   3,  -8]), 43.186599539122966)]

### BULLET

In [95]:
X_bullet = df_bullet_features.values
y_bullet = df_bullet['Result'].values

X_bullet_train, X_bullet_test, y_bullet_train, y_bullet_test = train_test_split(X_bullet, y_bullet, test_size=0.1, random_state = 42)

In [96]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = decision_tree_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = decision_tree_classifier.predict(X_bullet_test)
print(decision_tree_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

46.87477208081103
              precision    recall  f1-score   support

           1    0.47053   0.52376   0.49572      5197
           2    0.49413   0.47366   0.48368      5240
           3    0.42129   0.37355   0.39599      3274

    accuracy                        0.46875     13711
   macro avg    0.46198   0.45699   0.45846     13711
weighted avg    0.46779   0.46875   0.46730     13711



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 33.363626241207065),
 (array([  2, 101,   0,   8,  40]), 11.614487787925274),
 (array([   1,    0,    0,    1, -222]), 7.046670682517539),
 (array([  1, 302,   0,   3,  -8]), 47.97521528835013)]

### CLASSICAL

In [97]:
X_classical = df_classical_features.values
y_classical = df_classical['Result'].values

X_classical_train, X_classical_test, y_classical_train, y_classical_test = train_test_split(X_classical, y_classical, test_size=0.1, random_state = 42)

In [98]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = decision_tree_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = decision_tree_classifier.predict(X_classical_test)
print(decision_tree_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

39.911417322834644
              precision    recall  f1-score   support

           1    0.35791   0.41791   0.38559      4879
           2    0.38179   0.37891   0.38034      5025
           3    0.45675   0.40066   0.42687      6352

    accuracy                        0.39911     16256
   macro avg    0.39882   0.39916   0.39760     16256
weighted avg    0.40391   0.39911   0.40010     16256



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 32.57192185670029),
 (array([  2, 101,   0,   8,  40]), 5.151399555498486),
 (array([   1,    0,    0,    1, -222]), 18.79230760837022),
 (array([  1, 302,   0,   3,  -8]), 43.484370979431006)]

### CORRESPONDENCE

In [99]:
X_correspondence = df_correspondence_features.values
y_correspondence = df_correspondence['Result'].values

X_correspondence_train, X_correspondence_test, y_correspondence_train, y_correspondence_test = train_test_split(X_correspondence, y_correspondence, test_size=0.1, random_state = 42)

In [100]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = decision_tree_classifier.predict(X_correspondence_test)
print(decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

45.703125
              precision    recall  f1-score   support

           1    0.28125   0.32143   0.30000        56
           2    0.39706   0.40909   0.40299        66
           3    0.58065   0.53731   0.55814       134

    accuracy                        0.45703       256
   macro avg    0.41965   0.42261   0.42037       256
weighted avg    0.46782   0.45703   0.46167       256



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 33.53261719776274),
 (array([  2, 101,   0,   8,  40]), 8.242481243013797),
 (array([   1,    0,    0,    1, -222]), 0.0),
 (array([  1, 302,   0,   3,  -8]), 58.224901559223476)]

### DT - STROJONY

In [101]:
decision_tree_classifier = DecisionTreeClassifier(max_depth=6, max_features=5)
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

51.44348073874868
              precision    recall  f1-score   support

           1    0.59594   0.35793   0.44724     18716
           2    0.58504   0.41000   0.48213     18651
           3    0.45667   0.77409   0.57445     18782

    accuracy                        0.51443     56149
   macro avg    0.54588   0.51401   0.50127     56149
weighted avg    0.54573   0.51443   0.50138     56149



[(array([  0, 304,   1,   8, 242]), 0.05834667025597098),
 (array([  1, 100,   0,   3,  71]), 0.23350587817253804),
 (array([  2, 101,   0,   8,  40]), 51.71366875063267),
 (array([   1,    0,    0,    1, -222]), 0.2663722752298598),
 (array([  1, 302,   0,   3,  -8]), 47.728106425708965)]

# RF

In [102]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

### BASIC

In [103]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

44.36410265543464
              precision    recall  f1-score   support

           1    0.44111   0.41943   0.43000     18716
           2    0.44788   0.42711   0.43725     18651
           3    0.44216   0.48419   0.46222     18782

    accuracy                        0.44364     56149
   macro avg    0.44372   0.44357   0.44316     56149
weighted avg    0.44371   0.44364   0.44318     56149



[(array([  0, 304,   1,   8, 242]), 0.5919342062481115),
 (array([  1, 100,   0,   3,  71]), 33.77401994783949),
 (array([  2, 101,   0,   8,  40]), 8.642019036112437),
 (array([   1,    0,    0,    1, -222]), 6.423415926538392),
 (array([  1, 302,   0,   3,  -8]), 50.56861088326158)]

### BLITZ

In [104]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = rf_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = rf_classifier.predict(X_blitz_test)
print(rf_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

43.9370564640543
              precision    recall  f1-score   support

           1    0.42055   0.39317   0.40640      8340
           2    0.44000   0.41744   0.42842      8406
           3    0.45333   0.50142   0.47616      9182

    accuracy                        0.43937     25928
   macro avg    0.43796   0.43734   0.43699     25928
weighted avg    0.43846   0.43937   0.43824     25928



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 36.35480626682027),
 (array([  2, 101,   0,   8,  40]), 8.5858991626655),
 (array([   1,    0,    0,    1, -222]), 2.6936550013827736),
 (array([  1, 302,   0,   3,  -8]), 52.36563956913145)]

### BULLET

In [105]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = rf_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = rf_classifier.predict(X_bullet_test)
print(rf_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

49.20866457588797
              precision    recall  f1-score   support

           1    0.50261   0.50087   0.50173      5197
           2    0.52212   0.50897   0.51546      5240
           3    0.43137   0.45113   0.44103      3274

    accuracy                        0.49209     13711
   macro avg    0.48537   0.48699   0.48607     13711
weighted avg    0.49305   0.49209   0.49248     13711



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 30.06366319145201),
 (array([  2, 101,   0,   8,  40]), 10.858970597465197),
 (array([   1,    0,    0,    1, -222]), 1.737075471813277),
 (array([  1, 302,   0,   3,  -8]), 57.340290739269506)]

### CLASSICAL

In [106]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = rf_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = rf_classifier.predict(X_classical_test)
print(rf_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

42.384350393700785
              precision    recall  f1-score   support

           1    0.39405   0.38307   0.38848      4879
           2    0.40556   0.37174   0.38791      5025
           3    0.45649   0.49638   0.47560      6352

    accuracy                        0.42384     16256
   macro avg    0.41870   0.41706   0.41733     16256
weighted avg    0.42201   0.42384   0.42235     16256



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 29.141291507204954),
 (array([  2, 101,   0,   8,  40]), 4.725333608062803),
 (array([   1,    0,    0,    1, -222]), 10.86909646084928),
 (array([  1, 302,   0,   3,  -8]), 55.26427842388296)]

### CORRESPONDENCE

In [107]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = rf_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = rf_classifier.predict(X_correspondence_test)
print(rf_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

48.828125
              precision    recall  f1-score   support

           1    0.33929   0.33929   0.33929        56
           2    0.43103   0.37879   0.40323        66
           3    0.57042   0.60448   0.58696       134

    accuracy                        0.48828       256
   macro avg    0.44691   0.44085   0.44316       256
weighted avg    0.48393   0.48828   0.48541       256



[(array([  0, 304,   1,   8, 242]), 0.0),
 (array([  1, 100,   0,   3,  71]), 32.43607155078441),
 (array([  2, 101,   0,   8,  40]), 8.283793900682069),
 (array([   1,    0,    0,    1, -222]), 0.0),
 (array([  1, 302,   0,   3,  -8]), 59.28013454853351)]

### RF - STROJONY

In [108]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

51.4114231776167
              precision    recall  f1-score   support

           1    0.60550   0.35510   0.44766     18716
           2    0.61613   0.35430   0.44989     18651
           3    0.45323   0.83127   0.58662     18782

    accuracy                        0.51411     56149
   macro avg    0.55829   0.51356   0.49473     56149
weighted avg    0.55810   0.51411   0.49489     56149



[(array([  0, 304,   1,   8, 242]), 0.6902447006415262),
 (array([  1, 100,   0,   3,  71]), 0.572605684739938),
 (array([  2, 101,   0,   8,  40]), 54.33988864804176),
 (array([   1,    0,    0,    1, -222]), 2.279920713731898),
 (array([  1, 302,   0,   3,  -8]), 42.11734025284489)]

# D2


In [109]:
df2 = pd.read_csv('../csv/d2_final.csv')
df2.head()

Unnamed: 0.1,Unnamed: 0,Result,WhiteElo,BlackElo,EloDiff,Event,ECO_enc,Termination,TimeControl_enc
0,0,1,2460,2218,242,0,200,1,6
1,1,1,2428,2424,4,0,483,0,6
2,2,1,2441,2300,141,4,106,0,13
3,3,2,2280,2667,-387,0,464,0,6
4,4,1,2557,2682,-125,0,41,0,6


In [110]:
df2.drop(['Unnamed: 0','BlackElo','WhiteElo'], axis = 1, inplace = True)
df2.rename(columns={'Event': 'Event_enc', 'Termination': 'Termination_enc'}, inplace = True)
df2.head()

Unnamed: 0,Result,EloDiff,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,242,0,200,1,6
1,1,4,0,483,0,6
2,1,141,4,106,0,13
3,2,-387,0,464,0,6
4,1,-125,0,41,0,6


In [111]:
print(df2.shape)

(98756, 6)


In [112]:
feature_names = ['Event_enc', 'ECO_enc', 'Termination_enc', 'TimeControl_enc','EloDiff']
X2 = df2[feature_names].values
y2 = df2['Result'].values
print(X2.shape)
print(y2.shape)

(98756, 5)
(98756,)


In [113]:
decision_tree_classifier = DecisionTreeClassifier()

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

37.40937259508283
              precision    recall  f1-score   support

           1    0.52574   0.39379   0.45030     47170
           2    0.48388   0.33872   0.39849     42549
           3    0.11762   0.43787   0.18544      9037

    accuracy                        0.37409     98756
   macro avg    0.37575   0.39012   0.34474     98756
weighted avg    0.47036   0.37409   0.40374     98756



[(array([  0, 200,   1,   6, 242]), 3.4770650146641784),
 (array([  0, 483,   0,   6,   4]), 30.35480349206078),
 (array([  4, 106,   0,  13, 141]), 9.746563836700448),
 (array([   0,  464,    0,    6, -387]), 16.022833788447453),
 (array([   0,   41,    0,    6, -125]), 40.39873386812716)]

In [114]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=6, max_features=5)

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

29.638705496374907
              precision    recall  f1-score   support

           1    0.65285   0.24511   0.35641     47170
           2    0.63447   0.23359   0.34146     42549
           3    0.11883   0.85969   0.20879      9037

    accuracy                        0.29639     98756
   macro avg    0.46872   0.44613   0.30222     98756
weighted avg    0.59606   0.29639   0.33646     98756



[(array([  0, 200,   1,   6, 242]), 0.031271540612088006),
 (array([  0, 483,   0,   6,   4]), 0.15648689833519305),
 (array([  4, 106,   0,  13, 141]), 62.69565964928594),
 (array([   0,  464,    0,    6, -387]), 0.21396986475418628),
 (array([   0,   41,    0,    6, -125]), 36.902612047012596)]

In [115]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

34.85762890356029
              precision    recall  f1-score   support

           1    0.54469   0.32711   0.40875     47170
           2    0.50287   0.32894   0.39772     42549
           3    0.11733   0.55306   0.19360      9037

    accuracy                        0.34858     98756
   macro avg    0.38830   0.40304   0.33336     98756
weighted avg    0.48757   0.34858   0.38431     98756



[(array([  0, 200,   1,   6, 242]), 0.6134995390997221),
 (array([  0, 483,   0,   6,   4]), 33.40314130195305),
 (array([  4, 106,   0,  13, 141]), 8.607713524046003),
 (array([   0,  464,    0,    6, -387]), 6.722465451142663),
 (array([   0,   41,    0,    6, -125]), 50.653180183758565)]

In [116]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

25.780712058001537
              precision    recall  f1-score   support

           1    0.64985   0.18599   0.28920     47170
           2    0.62187   0.19338   0.29502     42549
           3    0.11745   0.93604   0.20870      9037

    accuracy                        0.25781     98756
   macro avg    0.46306   0.43847   0.26431     98756
weighted avg    0.58908   0.25781   0.28434     98756



[(array([  0, 200,   1,   6, 242]), 0.7279123324177673),
 (array([  0, 483,   0,   6,   4]), 0.5748935635959024),
 (array([  4, 106,   0,  13, 141]), 54.647962936537375),
 (array([   0,  464,    0,    6, -387]), 2.8025089355974133),
 (array([   0,   41,    0,    6, -125]), 41.24672223185154)]

# DS2

In [117]:
df2_sample = pd.read_csv('../csv/df2_sample.csv')
df2_sample.head()

Unnamed: 0,Result,WhiteElo,BlackElo,EloDiff,Event,ECO_enc,Termination,TimeControl_enc
0,1,2669,2625,44,0,101,0,6
1,1,2516,2337,179,0,120,1,6
2,1,2468,2435,33,0,106,0,6
3,1,2404,2337,67,0,10,0,6
4,1,2295,2438,-143,0,492,0,6


In [118]:
df2_sample.drop(['BlackElo', 'WhiteElo'], axis = 1, inplace = True)
df2_sample.rename(columns={'Event': 'Event_enc', 'Termination': 'Termination_enc'}, inplace = True)
df2_sample.head()

Unnamed: 0,Result,EloDiff,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,44,0,101,0,6
1,1,179,0,120,1,6
2,1,33,0,106,0,6
3,1,67,0,10,0,6
4,1,-143,0,492,0,6


In [119]:
print(df2_sample.shape)

(27111, 6)


In [120]:
feature_names = ['Event_enc', 'ECO_enc', 'Termination_enc', 'TimeControl_enc','EloDiff']
X2 = df2_sample[feature_names].values
y2 = df2_sample['Result'].values
print(X2.shape)
print(y2.shape)

(27111, 5)
(27111,)


In [121]:
decision_tree_classifier = DecisionTreeClassifier()

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

39.18704584854856
              precision    recall  f1-score   support

           1    0.38383   0.39759   0.39059      9037
           2    0.39227   0.34126   0.36499      9037
           3    0.39917   0.43676   0.41712      9037

    accuracy                        0.39187     27111
   macro avg    0.39175   0.39187   0.39090     27111
weighted avg    0.39175   0.39187   0.39090     27111



[(array([  0, 101,   0,   6,  44]), 3.4709660661798756),
 (array([  0, 120,   1,   6, 179]), 30.37437204393821),
 (array([  0, 106,   0,   6,  33]), 9.74656383670042),
 (array([ 0, 10,  0,  6, 67]), 16.021918928378),
 (array([   0,  492,    0,    6, -143]), 40.3861791248035)]

In [122]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=6, max_features=5)
decision_tree_classifier.fit(X_train, y_train)
y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

44.43214931208735
              precision    recall  f1-score   support

           1    0.54496   0.24145   0.33464      9037
           2    0.56622   0.23182   0.32896      9037
           3    0.40032   0.85969   0.54627      9037

    accuracy                        0.44432     27111
   macro avg    0.50383   0.44432   0.40329     27111
weighted avg    0.50383   0.44432   0.40329     27111



[(array([  0, 101,   0,   6,  44]), 0.031271540612088006),
 (array([  0, 120,   1,   6, 179]), 0.15648689833519305),
 (array([  0, 106,   0,   6,  33]), 62.69565964928594),
 (array([ 0, 10,  0,  6, 67]), 0.21396986475418628),
 (array([   0,  492,    0,    6, -143]), 36.902612047012596)]

In [123]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

40.61451071520785
              precision    recall  f1-score   support

           1    0.40644   0.32832   0.36322      9037
           2    0.41426   0.33230   0.36878      9037
           3    0.40129   0.55782   0.46678      9037

    accuracy                        0.40615     27111
   macro avg    0.40733   0.40615   0.39960     27111
weighted avg    0.40733   0.40615   0.39960     27111



[(array([  0, 101,   0,   6,  44]), 0.5922330533908106),
 (array([  0, 120,   1,   6, 179]), 34.02854511594214),
 (array([  0, 106,   0,   6,  33]), 8.57256136603211),
 (array([ 0, 10,  0,  6, 67]), 6.407619836798287),
 (array([   0,  492,    0,    6, -143]), 50.39904062783667)]

In [124]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

43.690752830954224
              precision    recall  f1-score   support

           1    0.56973   0.18037   0.27400      9037
           2    0.58755   0.19531   0.29317      9037
           3    0.39772   0.93504   0.55807      9037

    accuracy                        0.43691     27111
   macro avg    0.51833   0.43691   0.37508     27111
weighted avg    0.51833   0.43691   0.37508     27111



[(array([  0, 101,   0,   6,  44]), 0.7892352664859609),
 (array([  0, 120,   1,   6, 179]), 0.6161250332754953),
 (array([  0, 106,   0,   6,  33]), 54.28283934435403),
 (array([ 0, 10,  0,  6, 67]), 2.6173534060912984),
 (array([   0,  492,    0,    6, -143]), 41.694446949793225)]