In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [2]:
df_sample = pd.read_csv('../csv/df_sample.csv')

In [3]:
df_sample.head()

Unnamed: 0,Result,WhiteElo,BlackElo,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,1721,1746,1,311,1,5
1,1,1319,1182,2,101,1,13
2,1,2007,1999,0,291,0,6
3,1,1734,1546,1,200,0,3
4,1,2148,1580,0,46,0,6


In [4]:
df_sample.shape

(561486, 7)

In [5]:
df_sample_features = df_sample.drop('Result', axis = 1)

In [6]:
X = df_sample_features.values
y = df_sample['Result'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 42)

# Split into smaller dataframes

In [7]:
df_blitz = df_sample[df_sample['Event_enc'] == 0]
df_bullet = df_sample[df_sample['Event_enc'] == 1]
df_classical = df_sample[df_sample['Event_enc'] == 2]
df_correspondence = df_sample[df_sample['Event_enc'] == 3]

In [8]:
df_blitz_features = df_blitz.drop('Result', axis = 1)
df_bullet_features = df_bullet.drop('Result', axis = 1)
df_classical_features = df_classical.drop('Result', axis = 1)
df_correspondence_features = df_correspondence.drop('Result', axis = 1)

# Decision Tree

### BASIC

In [9]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

42.30707581613208
              precision    recall  f1-score   support

           1    0.41645   0.42183   0.41912     18716
           2    0.41179   0.41901   0.41537     18651
           3    0.44172   0.42834   0.43492     18782

    accuracy                        0.42307     56149
   macro avg    0.42332   0.42306   0.42314     56149
weighted avg    0.42335   0.42307   0.42316     56149



[(array([1691, 1449,    0,  304,    1,    8]), 29.404051666051274),
 (array([1998, 1927,    1,  100,    0,    3]), 27.951483977990677),
 (array([1501, 1461,    2,  101,    0,    8]), 2.6645179419782226),
 (array([1545, 1767,    1,    0,    0,    1]), 23.1579083870836),
 (array([1895, 1903,    1,  302,    0,    3]), 6.977803801469499),
 (array([1811, 1987,    0,   43,    0,    6]), 9.84423422542671)]

### BLITZ

In [10]:
X_blitz = df_blitz_features.values
y_blitz = df_blitz['Result'].values

X_blitz_train, X_blitz_test, y_blitz_train, y_blitz_test = train_test_split(X_blitz, y_blitz, test_size=0.1, random_state = 42)

In [11]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = decision_tree_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = decision_tree_classifier.predict(X_blitz_test)
print(decision_tree_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

41.42240049367479
              precision    recall  f1-score   support

           1    0.39217   0.39532   0.39374      8340
           2    0.40056   0.40757   0.40403      8406
           3    0.44793   0.43749   0.44264      9182

    accuracy                        0.41422     25928
   macro avg    0.41355   0.41346   0.41347     25928
weighted avg    0.41464   0.41422   0.41440     25928



[(array([1691, 1449,    0,  304,    1,    8]), 31.86321162244285),
 (array([1998, 1927,    1,  100,    0,    3]), 29.40375900970459),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 25.808329014963395),
 (array([1895, 1903,    1,  302,    0,    3]), 6.303193495874447),
 (array([1811, 1987,    0,   43,    0,    6]), 6.62150685701473)]

### BULLET

In [12]:
X_bullet = df_bullet_features.values
y_bullet = df_bullet['Result'].values

X_bullet_train, X_bullet_test, y_bullet_train, y_bullet_test = train_test_split(X_bullet, y_bullet, test_size=0.1, random_state = 42)

In [13]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = decision_tree_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = decision_tree_classifier.predict(X_bullet_test)
print(decision_tree_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

47.44365837648603
              precision    recall  f1-score   support

           1    0.48359   0.48470   0.48414      5197
           2    0.49443   0.49943   0.49691      5240
           3    0.42661   0.41814   0.42234      3274

    accuracy                        0.47444     13711
   macro avg    0.46821   0.46742   0.46780     13711
weighted avg    0.47412   0.47444   0.47427     13711



[(array([1691, 1449,    0,  304,    1,    8]), 31.134148409069244),
 (array([1998, 1927,    1,  100,    0,    3]), 32.425391219571665),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 21.510034147492995),
 (array([1895, 1903,    1,  302,    0,    3]), 8.987350877500122),
 (array([1811, 1987,    0,   43,    0,    6]), 5.94307534636596)]

### CLASSICAL

In [14]:
X_classical = df_classical_features.values
y_classical = df_classical['Result'].values

X_classical_train, X_classical_test, y_classical_train, y_classical_test = train_test_split(X_classical, y_classical, test_size=0.1, random_state = 42)

In [15]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = decision_tree_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = decision_tree_classifier.predict(X_classical_test)
print(decision_tree_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

39.54847440944882
              precision    recall  f1-score   support

           1    0.35591   0.36831   0.36201      4879
           2    0.37379   0.37692   0.37535      5025
           3    0.44593   0.43105   0.43836      6352

    accuracy                        0.39548     16256
   macro avg    0.39188   0.39209   0.39190     16256
weighted avg    0.39661   0.39548   0.39597     16256



[(array([1691, 1449,    0,  304,    1,    8]), 31.340464922308108),
 (array([1998, 1927,    1,  100,    0,    3]), 29.707449370450167),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 23.53522626699226),
 (array([1895, 1903,    1,  302,    0,    3]), 3.8732112473776876),
 (array([1811, 1987,    0,   43,    0,    6]), 11.543648192871764)]

### CORRESPONDENCE

In [16]:
X_correspondence = df_correspondence_features.values
y_correspondence = df_correspondence['Result'].values

X_correspondence_train, X_correspondence_test, y_correspondence_train, y_correspondence_test = train_test_split(X_correspondence, y_correspondence, test_size=0.1, random_state = 42)

In [17]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = decision_tree_classifier.predict(X_correspondence_test)
print(decision_tree_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, decision_tree_classifier.feature_importances_*100))

50.0
              precision    recall  f1-score   support

           1    0.38710   0.42857   0.40678        56
           2    0.33898   0.30303   0.32000        66
           3    0.62222   0.62687   0.62454       134

    accuracy                        0.50000       256
   macro avg    0.44943   0.45282   0.45044       256
weighted avg    0.49777   0.50000   0.49839       256



[(array([1691, 1449,    0,  304,    1,    8]), 31.10840258336982),
 (array([1998, 1927,    1,  100,    0,    3]), 34.258693611569),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 26.610060860145385),
 (array([1895, 1903,    1,  302,    0,    3]), 8.022842944915789),
 (array([1811, 1987,    0,   43,    0,    6]), 0.0)]

### DT - STROJONY

In [18]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=7, max_features=6)
decision_tree_classifier.fit(X_train, y_train)
classifier_accuracy = decision_tree_classifier.score(X_test, y_test)

y_pred = decision_tree_classifier.predict(X_test)
print(decision_tree_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, decision_tree_classifier.feature_importances_*100))

51.03741829774351
              precision    recall  f1-score   support

           1    0.55832   0.40639   0.47039     18716
           2    0.61216   0.33462   0.43271     18651
           3    0.45807   0.78852   0.57950     18782

    accuracy                        0.51037     56149
   macro avg    0.54285   0.50984   0.49420     56149
weighted avg    0.54267   0.51037   0.49437     56149



[(array([1691, 1449,    0,  304,    1,    8]), 23.341172856858034),
 (array([1998, 1927,    1,  100,    0,    3]), 13.010215755725627),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 0.008994919646675197),
 (array([1895, 1903,    1,  302,    0,    3]), 63.513841951063874),
 (array([1811, 1987,    0,   43,    0,    6]), 0.12577451670577747)]

# Random Forest

In [19]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

### BASIC

In [20]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

47.48793389018504
              precision    recall  f1-score   support

           1    0.48473   0.43241   0.45708     18716
           2    0.48566   0.43504   0.45896     18651
           3    0.45973   0.55676   0.50361     18782

    accuracy                        0.47488     56149
   macro avg    0.47671   0.47474   0.47322     56149
weighted avg    0.47668   0.47488   0.47327     56149



[(array([1691, 1449,    0,  304,    1,    8]), 32.50476429645958),
 (array([1998, 1927,    1,  100,    0,    3]), 32.45256481040193),
 (array([1501, 1461,    2,  101,    0,    8]), 0.8566150767099219),
 (array([1545, 1767,    1,    0,    0,    1]), 21.421466641581556),
 (array([1895, 1903,    1,  302,    0,    3]), 6.6801947925458),
 (array([1811, 1987,    0,   43,    0,    6]), 6.084394382301224)]

### BLITZ

In [21]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_blitz_train, y_blitz_train)
classifier_accuracy = rf_classifier.score(X_blitz_test, y_blitz_test)
y_blitz_pred = rf_classifier.predict(X_blitz_test)
print(rf_classifier.score(X_blitz_test, y_blitz_test)*100)
print(classification_report(y_blitz_test, y_blitz_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

46.25501388460352
              precision    recall  f1-score   support

           1    0.45475   0.41331   0.43304      8340
           2    0.46799   0.40876   0.43637      8406
           3    0.46429   0.55652   0.50624      9182

    accuracy                        0.46255     25928
   macro avg    0.46234   0.45953   0.45855     25928
weighted avg    0.46242   0.46255   0.46004     25928



[(array([1691, 1449,    0,  304,    1,    8]), 34.11840243116681),
 (array([1998, 1927,    1,  100,    0,    3]), 33.8200657697131),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 22.513121571530284),
 (array([1895, 1903,    1,  302,    0,    3]), 6.341912623141237),
 (array([1811, 1987,    0,   43,    0,    6]), 3.2064976044485807)]

### BULLET

In [22]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_bullet_train, y_bullet_train)
classifier_accuracy = rf_classifier.score(X_bullet_test, y_bullet_test)
y_bullet_pred = rf_classifier.predict(X_bullet_test)
print(rf_classifier.score(X_bullet_test, y_bullet_test)*100)
print(classification_report(y_bullet_test, y_bullet_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

53.0450003646707
              precision    recall  f1-score   support

           1    0.54909   0.52627   0.53743      5197
           2    0.56112   0.54924   0.55512      5240
           3    0.46098   0.50703   0.48291      3274

    accuracy                        0.53045     13711
   macro avg    0.52373   0.52751   0.52515     13711
weighted avg    0.53265   0.53045   0.53117     13711



[(array([1691, 1449,    0,  304,    1,    8]), 34.05250164213364),
 (array([1998, 1927,    1,  100,    0,    3]), 34.57357538113346),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 19.755810354107407),
 (array([1895, 1903,    1,  302,    0,    3]), 8.986500958530446),
 (array([1811, 1987,    0,   43,    0,    6]), 2.631611664095047)]

### CLASSICAL

In [23]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_classical_train, y_classical_train)
classifier_accuracy = rf_classifier.score(X_classical_test, y_classical_test)
y_classical_pred = rf_classifier.predict(X_classical_test)
print(rf_classifier.score(X_classical_test, y_classical_test)*100)
print(classification_report(y_classical_test, y_classical_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

44.63582677165354
              precision    recall  f1-score   support

           1    0.42317   0.37590   0.39813      4879
           2    0.43390   0.36836   0.39845      5025
           3    0.46643   0.56219   0.50985      6352

    accuracy                        0.44636     16256
   macro avg    0.44116   0.43548   0.43548     16256
weighted avg    0.44339   0.44636   0.44188     16256



[(array([1691, 1449,    0,  304,    1,    8]), 33.34626861473402),
 (array([1998, 1927,    1,  100,    0,    3]), 33.2558475983208),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 20.31164225574507),
 (array([1895, 1903,    1,  302,    0,    3]), 3.881566791555534),
 (array([1811, 1987,    0,   43,    0,    6]), 9.204674739644565)]

### CORRESPONDENCE

In [24]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_correspondence_train, y_correspondence_train)
classifier_accuracy = rf_classifier.score(X_correspondence_test, y_correspondence_test)
y_correspondence_pred = rf_classifier.predict(X_correspondence_test)
print(rf_classifier.score(X_correspondence_test, y_correspondence_test)*100)
print(classification_report(y_correspondence_test, y_correspondence_pred,digits = 5))
list(zip(X_train, rf_classifier.feature_importances_*100))

53.125
              precision    recall  f1-score   support

           1    0.44186   0.33929   0.38384        56
           2    0.37209   0.24242   0.29358        66
           3    0.59412   0.75373   0.66447       134

    accuracy                        0.53125       256
   macro avg    0.46936   0.44515   0.44730       256
weighted avg    0.50357   0.53125   0.50746       256



[(array([1691, 1449,    0,  304,    1,    8]), 33.041390493433894),
 (array([1998, 1927,    1,  100,    0,    3]), 33.49468434546479),
 (array([1501, 1461,    2,  101,    0,    8]), 0.0),
 (array([1545, 1767,    1,    0,    0,    1]), 25.465879395441366),
 (array([1895, 1903,    1,  302,    0,    3]), 7.998045765659945),
 (array([1811, 1987,    0,   43,    0,    6]), 0.0)]

### RF - STROJONY

In [25]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)
classifier_accuracy = rf_classifier.score(X_test, y_test)

y_pred = rf_classifier.predict(X_test)
print(rf_classifier.score(X_test, y_test)*100)
print(classification_report(y_test, y_pred,digits = 5))

list(zip(X_train, rf_classifier.feature_importances_*100))

50.30543731856311
              precision    recall  f1-score   support

           1    0.62239   0.28692   0.39277     18716
           2    0.60866   0.33231   0.42991     18651
           3    0.44668   0.88798   0.59437     18782

    accuracy                        0.50305     56149
   macro avg    0.55924   0.50240   0.47235     56149
weighted avg    0.55905   0.50305   0.47254     56149



[(array([1691, 1449,    0,  304,    1,    8]), 15.821889231558023),
 (array([1998, 1927,    1,  100,    0,    3]), 15.548435685993992),
 (array([1501, 1461,    2,  101,    0,    8]), 1.1512704439348909),
 (array([1545, 1767,    1,    0,    0,    1]), 0.6031817678148994),
 (array([1895, 1903,    1,  302,    0,    3]), 63.55182303040644),
 (array([1811, 1987,    0,   43,    0,    6]), 3.323399840291752)]

# D2

In [26]:
df2 = pd.read_csv('../csv/d2_final.csv')
df2.head()

Unnamed: 0.1,Unnamed: 0,Result,WhiteElo,BlackElo,EloDiff,Event,ECO_enc,Termination,TimeControl_enc
0,0,1,2460,2218,242,0,200,1,6
1,1,1,2428,2424,4,0,483,0,6
2,2,1,2441,2300,141,4,106,0,13
3,3,2,2280,2667,-387,0,464,0,6
4,4,1,2557,2682,-125,0,41,0,6


In [27]:
df2.drop(['Unnamed: 0','EloDiff'], axis = 1, inplace = True)
df2.rename(columns={'Event': 'Event_enc', 'Termination': 'Termination_enc'}, inplace = True)
df2.head()

Unnamed: 0,Result,WhiteElo,BlackElo,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,2460,2218,0,200,1,6
1,1,2428,2424,0,483,0,6
2,1,2441,2300,4,106,0,13
3,2,2280,2667,0,464,0,6
4,1,2557,2682,0,41,0,6


In [28]:
print(df2.shape)

(98756, 7)


In [29]:
feature_names = ['WhiteElo', 'BlackElo', 'Event_enc', 'ECO_enc', 'Termination_enc', 'TimeControl_enc']
X2 = df2[feature_names].values
y2 = df2['Result'].values
print(X2.shape)
print(y2.shape)

(98756, 6)
(98756,)


In [30]:
decision_tree_classifier = DecisionTreeClassifier()

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

32.10842885495565
              precision    recall  f1-score   support

           1    0.52749   0.29184   0.37578     47170
           2    0.48411   0.29507   0.36666     42549
           3    0.11531   0.59622   0.19325      9037

    accuracy                        0.32108     98756
   macro avg    0.37564   0.39438   0.31190     98756
weighted avg    0.47109   0.32108   0.35515     98756



[(array([2460, 2218,    0,  200,    1,    6]), 29.403567089118106),
 (array([2428, 2424,    0,  483,    0,    6]), 28.004143169874684),
 (array([2441, 2300,    4,  106,    0,   13]), 2.6770184803960086),
 (array([2280, 2667,    0,  464,    0,    6]), 23.14432290612834),
 (array([2557, 2682,    0,   41,    0,    6]), 6.977803801469508),
 (array([2554, 2552,    0,  429,    0,    6]), 9.793144553013342)]

In [31]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=7, max_features=6)

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

19.80436631698327
              precision    recall  f1-score   support

           1    0.50794   0.22304   0.30997     47170
           2    0.00000   0.00000   0.00000     42549
           3    0.11580   1.00000   0.20756      9037

    accuracy                        0.19804     98756
   macro avg    0.20791   0.40768   0.17251     98756
weighted avg    0.25321   0.19804   0.16705     98756



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[(array([2460, 2218,    0,  200,    1,    6]), 23.341172856858034),
 (array([2428, 2424,    0,  483,    0,    6]), 13.010215755725627),
 (array([2441, 2300,    4,  106,    0,   13]), 0.0),
 (array([2280, 2667,    0,  464,    0,    6]), 0.008994919646675197),
 (array([2557, 2682,    0,   41,    0,    6]), 63.513841951063874),
 (array([2554, 2552,    0,  429,    0,    6]), 0.12577451670577747)]

In [32]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

25.993357365628416
              precision    recall  f1-score   support

           1    0.57498   0.19875   0.29539     47170
           2    0.55314   0.19925   0.29297     42549
           3    0.11646   0.86500   0.20528      9037

    accuracy                        0.25993     98756
   macro avg    0.41486   0.42100   0.26455     98756
weighted avg    0.52361   0.25993   0.28610     98756



[(array([2460, 2218,    0,  200,    1,    6]), 32.39129648331703),
 (array([2428, 2424,    0,  483,    0,    6]), 32.47789762619998),
 (array([2441, 2300,    4,  106,    0,   13]), 0.8502036740757881),
 (array([2280, 2667,    0,  464,    0,    6]), 21.43271486430302),
 (array([2557, 2682,    0,   41,    0,    6]), 6.697479224573912),
 (array([2554, 2552,    0,  429,    0,    6]), 6.15040812753028)]

In [33]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

19.718295597229535
              precision    recall  f1-score   support

           1    0.66223   0.01056   0.02078     47170
           2    0.49787   0.23357   0.31797     42549
           3    0.11580   1.00000   0.20756      9037

    accuracy                        0.19718     98756
   macro avg    0.42530   0.41471   0.18210     98756
weighted avg    0.54141   0.19718   0.16592     98756



[(array([2460, 2218,    0,  200,    1,    6]), 17.268407940627366),
 (array([2428, 2424,    0,  483,    0,    6]), 16.221504798034353),
 (array([2441, 2300,    4,  106,    0,   13]), 1.203699753467859),
 (array([2280, 2667,    0,  464,    0,    6]), 0.49547000570469324),
 (array([2557, 2682,    0,   41,    0,    6]), 61.89607717137922),
 (array([2554, 2552,    0,  429,    0,    6]), 2.9148403307865167)]

# DS2

In [34]:
df2_sample = pd.read_csv('../csv/df2_sample.csv')
df2_sample.head()

Unnamed: 0,Result,WhiteElo,BlackElo,EloDiff,Event,ECO_enc,Termination,TimeControl_enc
0,1,2669,2625,44,0,101,0,6
1,1,2516,2337,179,0,120,1,6
2,1,2468,2435,33,0,106,0,6
3,1,2404,2337,67,0,10,0,6
4,1,2295,2438,-143,0,492,0,6


In [35]:
df2_sample.drop(['EloDiff'], axis = 1, inplace = True)
df2_sample.rename(columns={'Event': 'Event_enc', 'Termination': 'Termination_enc'}, inplace = True)
df2_sample.head()

Unnamed: 0,Result,WhiteElo,BlackElo,Event_enc,ECO_enc,Termination_enc,TimeControl_enc
0,1,2669,2625,0,101,0,6
1,1,2516,2337,0,120,1,6
2,1,2468,2435,0,106,0,6
3,1,2404,2337,0,10,0,6
4,1,2295,2438,0,492,0,6


In [36]:
print(df2_sample.shape)

(27111, 7)


In [37]:
feature_names = ['WhiteElo', 'BlackElo', 'Event_enc', 'ECO_enc', 'Termination_enc', 'TimeControl_enc']
X2 = df2_sample[feature_names].values
y2 = df2_sample['Result'].values
print(X2.shape)
print(y2.shape)

(27111, 6)
(27111,)


In [38]:
decision_tree_classifier = DecisionTreeClassifier()

decision_tree_classifier.fit(X_train, y_train)

y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

39.2903249603482
              precision    recall  f1-score   support

           1    0.39521   0.29401   0.33718      9037
           2    0.38723   0.28726   0.32984      9037
           3    0.39455   0.59743   0.47524      9037

    accuracy                        0.39290     27111
   macro avg    0.39233   0.39290   0.38076     27111
weighted avg    0.39233   0.39290   0.38076     27111



[(array([2669, 2625,    0,  101,    0,    6]), 29.41681998634676),
 (array([2516, 2337,    0,  120,    1,    6]), 28.01125934010122),
 (array([2468, 2435,    0,  106,    0,    6]), 2.6599781766708013),
 (array([2404, 2337,    0,   10,    0,    6]), 23.169269683916884),
 (array([2295, 2438,    0,  492,    0,    6]), 6.977803801469505),
 (array([2638, 2445,    0,  346,    0,    6]), 9.764869011494847)]

In [39]:
decision_tree_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=7, max_features=6)
decision_tree_classifier.fit(X_train, y_train)
y_pred = decision_tree_classifier.predict(X2)
# print(decision_tree_classifier.score(y2, y_test))
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, decision_tree_classifier.feature_importances_*100))

40.869019954999814
              precision    recall  f1-score   support

           1    0.49063   0.22607   0.30952      9037
           2    0.00000   0.00000   0.00000      9037
           3    0.39382   1.00000   0.56510      9037

    accuracy                        0.40869     27111
   macro avg    0.29482   0.40869   0.29154     27111
weighted avg    0.29482   0.40869   0.29154     27111



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[(array([2669, 2625,    0,  101,    0,    6]), 23.341172856858034),
 (array([2516, 2337,    0,  120,    1,    6]), 13.010215755725627),
 (array([2468, 2435,    0,  106,    0,    6]), 0.0),
 (array([2404, 2337,    0,   10,    0,    6]), 0.008994919646675197),
 (array([2295, 2438,    0,  492,    0,    6]), 63.513841951063874),
 (array([2638, 2445,    0,  346,    0,    6]), 0.12577451670577747)]

In [40]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

41.591973737597286
              precision    recall  f1-score   support

           1    0.47105   0.20073   0.28150      9037
           2    0.47279   0.18269   0.26355      9037
           3    0.39513   0.86434   0.54234      9037

    accuracy                        0.41592     27111
   macro avg    0.44632   0.41592   0.36246     27111
weighted avg    0.44632   0.41592   0.36246     27111



[(array([2669, 2625,    0,  101,    0,    6]), 32.53247127312263),
 (array([2516, 2337,    0,  120,    1,    6]), 32.21470637784733),
 (array([2468, 2435,    0,  106,    0,    6]), 0.8750601272281441),
 (array([2404, 2337,    0,   10,    0,    6]), 21.42817839591236),
 (array([2295, 2438,    0,  492,    0,    6]), 6.715308530756984),
 (array([2638, 2445,    0,  346,    0,    6]), 6.234275295132531)]

In [41]:
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=8, min_samples_split=15, n_jobs=-1, bootstrap=True)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X2)
print(accuracy_score(y2, y_pred)*100)
print(classification_report(y2, y_pred,digits = 5))

list(zip(X2, rf_classifier.feature_importances_*100))

41.65467891261849
              precision    recall  f1-score   support

           1    0.56703   0.06318   0.11370      9037
           2    0.53373   0.18646   0.27637      9037
           3    0.39382   1.00000   0.56510      9037

    accuracy                        0.41655     27111
   macro avg    0.49820   0.41655   0.31839     27111
weighted avg    0.49820   0.41655   0.31839     27111



[(array([2669, 2625,    0,  101,    0,    6]), 15.97871824979807),
 (array([2516, 2337,    0,  120,    1,    6]), 16.164529858480677),
 (array([2468, 2435,    0,  106,    0,    6]), 0.9010550324354404),
 (array([2404, 2337,    0,   10,    0,    6]), 0.5682486046960724),
 (array([2295, 2438,    0,  492,    0,    6]), 62.513081053769795),
 (array([2638, 2445,    0,  346,    0,    6]), 3.874367200819947)]