In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt

In [31]:
X_train_df = pd.read_csv('train_features.csv', delimiter=',')
X_test_df = pd.read_csv('test_features.csv', delimiter=',')
y_train_df = pd.read_csv('train_targets.csv', delimiter=',')


In [32]:
X_train_df = X_train_df.drop(columns=['match_id_hash',
                      'game_mode',
                      'lobby_type',
                      'objectives_len',
                      'chat_len',
                      'r1_hero_id','r2_hero_id','r3_hero_id','r4_hero_id','r5_hero_id',
                      'd1_hero_id','d2_hero_id','d3_hero_id','d4_hero_id','d5_hero_id',
                      'r1_xp','r2_xp','r3_xp','r4_xp','r5_xp',
                      'd1_xp','d2_xp','d3_xp','d4_xp','d5_xp',
                      'r1_health','r2_health','r3_health','r4_health','r5_health',
                      'd1_health','d2_health','d3_health','d4_health','d5_health',
                      'r1_max_health','r2_max_health','r3_max_health','r4_max_health','r5_max_health',
                      'd1_max_health','d2_max_health','d3_max_health','d4_max_health','d5_max_health',
                      'r1_max_mana','r2_max_mana','r3_max_mana','r4_max_mana','r5_max_mana',
                      'd1_max_mana','d2_max_mana','d3_max_mana','d4_max_mana','d5_max_mana',
                      'r1_x','r2_x','r3_x','r4_x','r5_x',
                      'd1_x','d2_x','d3_x','d4_x','d5_x',
                      'r1_y','r2_y','r3_y','r4_y','r5_y',
                      'd1_y','d2_y','d3_y','d4_y','d5_y',
                      'r1_stuns','r2_stuns','r3_stuns','r4_stuns','r5_stuns',
                      'd1_stuns','d2_stuns','d3_stuns','d4_stuns','d5_stuns',
                      'r1_creeps_stacked','r2_creeps_stacked','r3_creeps_stacked','r4_creeps_stacked','r5_creeps_stacked',
                      'd1_creeps_stacked','d2_creeps_stacked','d3_creeps_stacked','d4_creeps_stacked','d5_creeps_stacked',
                      'r1_camps_stacked','r2_camps_stacked','r3_camps_stacked','r4_camps_stacked','r5_camps_stacked',
                      'd1_camps_stacked','d2_camps_stacked','d3_camps_stacked','d4_camps_stacked','d5_camps_stacked',
                      'r1_rune_pickups','r2_rune_pickups','r3_rune_pickups','r4_rune_pickups','r5_rune_pickups',
                      'd1_rune_pickups','d2_rune_pickups','d3_rune_pickups','d4_rune_pickups','d5_rune_pickups',
                      'r1_firstblood_claimed','r2_firstblood_claimed','r3_firstblood_claimed','r4_firstblood_claimed','r5_firstblood_claimed',
                      'd1_firstblood_claimed','d2_firstblood_claimed','d3_firstblood_claimed','d4_firstblood_claimed','d5_firstblood_claimed',
                      'r1_teamfight_participation','r2_teamfight_participation','r3_teamfight_participation','r4_teamfight_participation','r5_teamfight_participation',
                      'd1_teamfight_participation','d2_teamfight_participation','d3_teamfight_participation','d4_teamfight_participation','d5_teamfight_participation',
                      'r1_towers_killed','r2_towers_killed','r3_towers_killed','r4_towers_killed','r5_towers_killed',
                      'd1_towers_killed','d2_towers_killed','d3_towers_killed','d4_towers_killed','d5_towers_killed',
                      'r1_roshans_killed','r2_roshans_killed','r3_roshans_killed','r4_roshans_killed','r5_roshans_killed',
                      'd1_roshans_killed','d2_roshans_killed','d3_roshans_killed','d4_roshans_killed','d5_roshans_killed',
                      'r1_obs_placed','r2_obs_placed','r3_obs_placed','r4_obs_placed','r5_obs_placed',
                      'd1_obs_placed','d2_obs_placed','d3_obs_placed','d4_obs_placed','d5_obs_placed',
                      'r1_sen_placed','r2_sen_placed','r3_sen_placed','r4_sen_placed','r5_sen_placed',
                      'd1_sen_placed','d2_sen_placed','d3_sen_placed','d4_sen_placed','d5_sen_placed',
                           ])

In [33]:
X_test_df = X_test_df.drop(columns=['match_id_hash',
                      'game_mode',
                      'lobby_type',
                      'objectives_len',
                      'chat_len',
                      'r1_hero_id','r2_hero_id','r3_hero_id','r4_hero_id','r5_hero_id',
                      'd1_hero_id','d2_hero_id','d3_hero_id','d4_hero_id','d5_hero_id',
                      'r1_xp','r2_xp','r3_xp','r4_xp','r5_xp',
                      'd1_xp','d2_xp','d3_xp','d4_xp','d5_xp',
                      'r1_health','r2_health','r3_health','r4_health','r5_health',
                      'd1_health','d2_health','d3_health','d4_health','d5_health',
                      'r1_max_health','r2_max_health','r3_max_health','r4_max_health','r5_max_health',
                      'd1_max_health','d2_max_health','d3_max_health','d4_max_health','d5_max_health',
                      'r1_max_mana','r2_max_mana','r3_max_mana','r4_max_mana','r5_max_mana',
                      'd1_max_mana','d2_max_mana','d3_max_mana','d4_max_mana','d5_max_mana',
                      'r1_x','r2_x','r3_x','r4_x','r5_x',
                      'd1_x','d2_x','d3_x','d4_x','d5_x',
                      'r1_y','r2_y','r3_y','r4_y','r5_y',
                      'd1_y','d2_y','d3_y','d4_y','d5_y',
                      'r1_stuns','r2_stuns','r3_stuns','r4_stuns','r5_stuns',
                      'd1_stuns','d2_stuns','d3_stuns','d4_stuns','d5_stuns',
                      'r1_creeps_stacked','r2_creeps_stacked','r3_creeps_stacked','r4_creeps_stacked','r5_creeps_stacked',
                      'd1_creeps_stacked','d2_creeps_stacked','d3_creeps_stacked','d4_creeps_stacked','d5_creeps_stacked',
                      'r1_camps_stacked','r2_camps_stacked','r3_camps_stacked','r4_camps_stacked','r5_camps_stacked',
                      'd1_camps_stacked','d2_camps_stacked','d3_camps_stacked','d4_camps_stacked','d5_camps_stacked',
                      'r1_rune_pickups','r2_rune_pickups','r3_rune_pickups','r4_rune_pickups','r5_rune_pickups',
                      'd1_rune_pickups','d2_rune_pickups','d3_rune_pickups','d4_rune_pickups','d5_rune_pickups',
                      'r1_firstblood_claimed','r2_firstblood_claimed','r3_firstblood_claimed','r4_firstblood_claimed','r5_firstblood_claimed',
                      'd1_firstblood_claimed','d2_firstblood_claimed','d3_firstblood_claimed','d4_firstblood_claimed','d5_firstblood_claimed',
                      'r1_teamfight_participation','r2_teamfight_participation','r3_teamfight_participation','r4_teamfight_participation','r5_teamfight_participation',
                      'd1_teamfight_participation','d2_teamfight_participation','d3_teamfight_participation','d4_teamfight_participation','d5_teamfight_participation',
                      'r1_towers_killed','r2_towers_killed','r3_towers_killed','r4_towers_killed','r5_towers_killed',
                      'd1_towers_killed','d2_towers_killed','d3_towers_killed','d4_towers_killed','d5_towers_killed',
                      'r1_roshans_killed','r2_roshans_killed','r3_roshans_killed','r4_roshans_killed','r5_roshans_killed',
                      'd1_roshans_killed','d2_roshans_killed','d3_roshans_killed','d4_roshans_killed','d5_roshans_killed',
                      'r1_obs_placed','r2_obs_placed','r3_obs_placed','r4_obs_placed','r5_obs_placed',
                      'd1_obs_placed','d2_obs_placed','d3_obs_placed','d4_obs_placed','d5_obs_placed',
                      'r1_sen_placed','r2_sen_placed','r3_sen_placed','r4_sen_placed','r5_sen_placed',
                      'd1_sen_placed','d2_sen_placed','d3_sen_placed','d4_sen_placed','d5_sen_placed',
                           ])

In [34]:
X_train = X_train_df.iloc[:, :].values
X_train

array([[  155,     0,     0, ...,   851,    11,     3],
       [  658,     7,     2, ...,  1423,     8,     4],
       [   21,     0,     0, ...,    96,     0,     1],
       ...,
       [  643,     1,     4, ...,  5431,    51,    10],
       [ 2405,     3,     8, ..., 18231,   181,    24],
       [ 1775,     3,     4, ..., 14096,   225,    19]], dtype=int64)

In [35]:
X_test = X_test_df.iloc[:, :].values
X_test

array([[  23,    0,    0, ...,  115,    0,    1],
       [1044,    3,    5, ..., 4746,    5,   10],
       [1091,    3,    1, ..., 6456,   73,   11],
       ...,
       [ 391,    0,    1, ..., 1164,    4,    4],
       [1254,    0,    4, ..., 8015,  103,   14],
       [1553,    5,    7, ..., 6564,   23,   17]], dtype=int64)

In [36]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_df['radiant_win'] = label_encoder.fit_transform(y_train_df['radiant_win'])
y_train = y_train_df.iloc[:, 2]

In [37]:
y_train

0        0
1        1
2        1
3        1
4        0
        ..
39670    0
39671    1
39672    0
39673    1
39674    1
Name: radiant_win, Length: 39675, dtype: int64

In [38]:
results = []

In [39]:
criterion = 'gini'

gini_classifier = DecisionTreeClassifier(criterion=criterion, random_state=1)
gini_classifier.fit(X_train, y_train)
y_pred = gini_classifier.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(acc)
results.append({
    'classifier': 'CART',
    'criterion': criterion,
    'accuracy_score': acc
    })

NameError: name 'y_test' is not defined

In [None]:
criterion = 'entropy'

entropy_classifier = DecisionTreeClassifier(criterion=criterion, random_state=1)
entropy_classifier.fit(X_train, y_train)
y_pred = entropy_classifier.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(acc)
results.append({
    'classifier': 'C4.5',
    'criterion': criterion,
    'accuracy_score': acc
    })

In [None]:
etc_results = []

et_classifier = ExtraTreesClassifier(criterion='entropy', n_estimators=150, random_state=1)
et_classifier.fit(X_train, y_train)
y_pred = et_classifier.predict(X_test)
etc_results.append({
    'criterion': 'entropy',
    'n_estimators': 150, 
    'accuracy_score': accuracy_score(y_test, y_pred)})

result = max(etc_results, key=lambda x: x['accuracy_score'])
print(result['accuracy_score'])
result['classifier'] = 'Extra Trees Classifier'
results.append(result)

In [None]:
gb_results = []

gb_classifier = GradientBoostingClassifier(loss='log_loss', n_estimators=50, learning_rate=1, criterion='friedman_mse', max_depth=4, random_state=1)
gb_classifier.fit(X_train, y_train)
y_pred = gb_classifier.predict(X_test)
gb_results.append({
    'loss': 'log_loss',
    'learning_rate': 1,
    'n_estimators': 50,
    'criterion': 'friedman_mse',
    'max_depth': 4,
    'accuracy_score': accuracy_score(y_test, y_pred)
})

result = max(gb_results, key=lambda x: x['accuracy_score'])
print(result['accuracy_score'])
result['classifier'] = 'Gradient Boosting'
results.append(result)

In [None]:
hgb_results = []

hgb_classifier = HistGradientBoostingClassifier(learning_rate=0.2, max_iter=100, random_state=1)
hgb_classifier.fit(X_train, y_train)
y_pred = hgb_classifier.predict(X_test)
hgb_results.append({
    'learning_rate': 0.2,
    'max_iter': 100,
    'accuracy_score': accuracy_score(y_test, y_pred)
})

result = max(hgb_results, key=lambda x: x['accuracy_score'])
print(result['accuracy_score'])
result['classifier'] = 'Hist Gradient Boosting'
results.append(result)

In [None]:
rf_results = []

rf_classifier = RandomForestClassifier(n_estimators=50, criterion='gini', random_state=1)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)
rf_results.append({
    'n_estimators': 50,
    'criterion': 'gini',
    'accuracy_score': accuracy_score(y_test, y_pred)
})

result = max(rf_results, key=lambda x: x['accuracy_score'])
print(result['accuracy_score'])
result['classifier'] = 'Random Forest'
results.append(result)

In [None]:
ada_results = []

ab_classifier = AdaBoostClassifier(n_estimators=50, learning_rate=0.1, algorithm='SAMME.R', random_state=0)
ab_classifier.fit(X_train, y_train)
y_pred = ab_classifier.predict(X_test)
ada_results.append({
    'n_estimators': 50,
    'learning_rate': 0.1,
    'algorithm': 'SAMME.R',
    'accuracy_score': accuracy_score(y_test, y_pred)
})

result = max(ada_results, key=lambda x: x['accuracy_score'])
print(result['accuracy_score'])
result['classifier'] = 'Adaboost'
results.append(result)

In [None]:
plt.rcdefaults()
fig, ax = plt.subplots()

df = pd.DataFrame(results)

df_sorted = df.sort_values('accuracy_score')

ax.barh('classifier', 'accuracy_score', data=df_sorted)
for i in range(len(results)):
    ax.text(results[i]['accuracy_score'] * 0.9, results[i]['classifier'], round(results[i]['accuracy_score'], 4), ha='center', color='black')