In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [3]:
rawData = pd.read_csv('dota2GamePrediction_trainingdata.txt', header=None)
rawData.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Sven,Lone Druid,Venomancer,Clockwerk,Shadow Shaman,Invoker,Gyrocopter,Anti-Mage,Alchemist,Slark,2
1,Riki,Tinker,Puck,Leshrac,Nyx Assassin,Slardar,Sand King,Spectre,Necrolyte,Warlock,1
2,Invoker,Mirana,Pudge,Magnus,Keeper of the Light,Rubick,Tidehunter,Queen of Pain,Faceless Void,Sniper,2
3,Riki,Centaur Warrunner,Treant Protector,Queen of Pain,Broodmother,Rubick,Weaver,Troll Warlord,Alchemist,Drow Ranger,1
4,Razor,Kunkka,Drow Ranger,Leshrac,Zeus,Riki,Bane,Visage,Invoker,Timbersaw,1


In [12]:
header = ['radiantHero_1', 'radiantHero_2', 'radiantHero_3', 'radiantHero_4', 'radiantHero_5', 'direHero_1', 'direHero_2', 'direHero_3', 'direHero_4', 'direHero_5', 'winningTeam']
rawData.columns = header
rawData.head()

Unnamed: 0,radiantHero_1,radiantHero_2,radiantHero_3,radiantHero_4,radiantHero_5,direHero_1,direHero_2,direHero_3,direHero_4,direHero_5,winningTeam
0,Sven,Lone Druid,Venomancer,Clockwerk,Shadow Shaman,Invoker,Gyrocopter,Anti-Mage,Alchemist,Slark,2
1,Riki,Tinker,Puck,Leshrac,Nyx Assassin,Slardar,Sand King,Spectre,Necrolyte,Warlock,1
2,Invoker,Mirana,Pudge,Magnus,Keeper of the Light,Rubick,Tidehunter,Queen of Pain,Faceless Void,Sniper,2
3,Riki,Centaur Warrunner,Treant Protector,Queen of Pain,Broodmother,Rubick,Weaver,Troll Warlord,Alchemist,Drow Ranger,1
4,Razor,Kunkka,Drow Ranger,Leshrac,Zeus,Riki,Bane,Visage,Invoker,Timbersaw,1


In [21]:
# winningTeam is the target variable, defined as y
y = rawData["winningTeam"]

# the rest of the data is the features, defined as X
X = rawData.drop(["winningTeam"], axis=1)

# train/test split using 0.2 as the test size
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [24]:
# one hot encoding the categorical variables
categoricalColumns = ['radiantHero_1', 'radiantHero_2', 'radiantHero_3', 'radiantHero_4', 'radiantHero_5', 'direHero_1', 'direHero_2', 'direHero_3', 'direHero_4', 'direHero_5']
pipeline = ColumnTransformer([('cat', OneHotEncoder(handle_unknown='ignore'), categoricalColumns)], remainder="passthrough")
encoder = pipeline.fit(X_train)
X_train = encoder.transform(X_train)
X_test = encoder.transform(X_test)

In [36]:
# instantiate the XGBClassifier, enable categorical
model = XGBClassifier(
    objective="multi:softmax",
    num_class=5,
    n_estimators=300,
    learning_rate=0.1,
    max_depth=3,
    min_child_weight=1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=0,
    reg_alpha=0,
    reg_lambda=1,
    nthread=4,
    seed=42
)
# training the model
model.fit(X_train, y_train)

# make predictions on the test set
predictions = model.predict(X_test)

# measuring the accuracy
print(accuracy_score(y_test, predictions))



0.5766666666666667
