# Import base libraries

In [175]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
import torch
import torch.nn as nn
import torch.nn.functional as F


# Read Dota2 match data from csv file

In [176]:
doto_match_df = pd.read_csv("./data.csv")
doto_match_df.head()

Unnamed: 0,camps_stacked,denies,last_hits,assists,deaths,kills,match_id,start_time,win,hero_id,account_id,leaguename
0,33,4,56,21,5,2,5465684523,1591890200,True,90,326177197,GamaGames - Season 4 Professional - Part 2
1,32,0,61,11,8,0,5341713652,1586337968,False,90,319405932,Masters Tournament
2,32,0,25,19,2,0,3276678806,1498544603,True,30,86772934,The International 2017
3,31,2,101,19,4,0,5250431775,1582116657,False,90,1058332010,Asia Challenger league
4,29,0,69,17,7,2,4777319992,1558620545,False,75,186575040,TNG League 1


# Reorder data & delete useless column  
start_time is not related to win,league name and account_id is limited, result in worse performance

In [177]:
doto_match_df = doto_match_df[['match_id', 'win', 'kills', 'deaths', 'assists', 'last_hits', 'denies', 'camps_stacked', 'hero_id']]
doto_match_df.head()

Unnamed: 0,match_id,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id
0,5465684523,True,2,5,21,56,4,33,90
1,5341713652,False,0,8,11,61,0,32,90
2,3276678806,True,0,2,19,25,0,32,30
3,5250431775,False,0,4,19,101,2,31,90
4,4777319992,False,2,7,17,69,0,29,75


In [178]:
print(doto_match_df.hero_id.min())
print(doto_match_df.hero_id.max())

1
129


# one-hot encode hero_id  
As hero_id is index, not numerical value, it has to be encoded

In [179]:
doto_match_df = pd.get_dummies(doto_match_df, columns=['hero_id'])
print(doto_match_df.shape)

(200000, 127)


# Split data using match_id  
since match_id is independent to win, use it to split and drop it

In [180]:
doto_match_train = doto_match_df[doto_match_df.match_id % 10 < 7]
doto_match_test = doto_match_df[doto_match_df.match_id % 10 >= 7]
doto_match_train = doto_match_train.drop('match_id', axis=1)
doto_match_test = doto_match_test.drop('match_id', axis=1)
print(doto_match_train.shape, doto_match_test.shape)

(139242, 126) (60758, 126)


In [181]:
doto_match_train.head()

Unnamed: 0,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id_1,hero_id_2,hero_id_3,...,hero_id_111,hero_id_112,hero_id_113,hero_id_114,hero_id_119,hero_id_120,hero_id_121,hero_id_126,hero_id_128,hero_id_129
0,True,2,5,21,56,4,33,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,False,0,8,11,61,0,32,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,True,0,2,19,25,0,32,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,False,0,4,19,101,2,31,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,False,2,7,17,69,0,29,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [182]:
doto_match_test.head()

Unnamed: 0,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id_1,hero_id_2,hero_id_3,...,hero_id_111,hero_id_112,hero_id_113,hero_id_114,hero_id_119,hero_id_120,hero_id_121,hero_id_126,hero_id_128,hero_id_129
10,True,1,13,11,43,1,24,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12,False,1,5,5,154,2,23,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19,False,1,6,9,45,1,22,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,True,2,9,16,30,0,22,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,True,1,5,13,45,3,21,0,0,0,...,0,0,0,0,0,0,0,0,0,0
