# Import base libraries

In [425]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
import torch
import torch.nn as nn
import torch.nn.functional as F


# Read Dota2 match data from csv file

In [426]:
doto_match_df = pd.read_csv("./data.csv")
doto_match_df.head()

Unnamed: 0,camps_stacked,denies,last_hits,assists,deaths,kills,match_id,start_time,win,hero_id,account_id,leaguename
0,33,4,56,21,5,2,5465684523,1591890200,True,90,326177197,GamaGames - Season 4 Professional - Part 2
1,32,0,61,11,8,0,5341713652,1586337968,False,90,319405932,Masters Tournament
2,32,0,25,19,2,0,3276678806,1498544603,True,30,86772934,The International 2017
3,31,2,101,19,4,0,5250431775,1582116657,False,90,1058332010,Asia Challenger league
4,29,0,69,17,7,2,4777319992,1558620545,False,75,186575040,TNG League 1


# Check the win rate  
You can see data is not biased to win or lose

In [427]:
doto_match_df.groupby('win').size()

win
False     93642
True     106358
dtype: int64

# Reorder data & delete useless column  
start_time is not related to win,league name and account_id is limited, result in worse performance

In [428]:
doto_match_df = doto_match_df[['match_id', 'win', 'kills', 'deaths', 'assists', 'last_hits', 'denies', 'camps_stacked', 'hero_id']]
doto_match_df.head()

Unnamed: 0,match_id,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id
0,5465684523,True,2,5,21,56,4,33,90
1,5341713652,False,0,8,11,61,0,32,90
2,3276678806,True,0,2,19,25,0,32,30
3,5250431775,False,0,4,19,101,2,31,90
4,4777319992,False,2,7,17,69,0,29,75


In [429]:
print(doto_match_df.hero_id.min())
print(doto_match_df.hero_id.max())

1
129


# one-hot encode hero_id  
As hero_id is index, not numerical value, it has to be encoded

In [430]:
doto_match_df = pd.get_dummies(doto_match_df, columns=['hero_id'])
print(doto_match_df.shape)

(200000, 127)


# Split data using match_id  
since match_id is independent to win, use it to split and drop it

In [431]:
doto_match_train = doto_match_df[doto_match_df.match_id % 10 < 7]
doto_match_test = doto_match_df[doto_match_df.match_id % 10 >= 7]
doto_match_train = doto_match_train.drop('match_id', axis=1)
doto_match_test = doto_match_test.drop('match_id', axis=1)
print(doto_match_train.shape, doto_match_test.shape)

(139242, 126) (60758, 126)


In [432]:
doto_match_train.head()

Unnamed: 0,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id_1,hero_id_2,hero_id_3,...,hero_id_111,hero_id_112,hero_id_113,hero_id_114,hero_id_119,hero_id_120,hero_id_121,hero_id_126,hero_id_128,hero_id_129
0,True,2,5,21,56,4,33,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,False,0,8,11,61,0,32,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,True,0,2,19,25,0,32,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,False,0,4,19,101,2,31,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,False,2,7,17,69,0,29,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [433]:
doto_match_test.head()

Unnamed: 0,win,kills,deaths,assists,last_hits,denies,camps_stacked,hero_id_1,hero_id_2,hero_id_3,...,hero_id_111,hero_id_112,hero_id_113,hero_id_114,hero_id_119,hero_id_120,hero_id_121,hero_id_126,hero_id_128,hero_id_129
10,True,1,13,11,43,1,24,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12,False,1,5,5,154,2,23,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19,False,1,6,9,45,1,22,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,True,2,9,16,30,0,22,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,True,1,5,13,45,3,21,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [434]:
class DotoDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.len = df.shape[0]
        self.y_data = torch.from_numpy(df['win'].values).float()
        self.y_data = torch.reshape(self.y_data, (-1, 1))
        self.x_data = torch.from_numpy(df.drop('win', axis=1).values).float()
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [435]:
trainset = DotoDataset(doto_match_train)
testset = DotoDataset(doto_match_test)

In [436]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=True, num_workers=0)


# Make neural network class  
Make neural network class using pytorch. input size, width of each layer, output size can be varied.

In [437]:
class DotoNet(torch.nn.Module):
    def __init__(self, input_size, width_1, width_2, output_size):
        super().__init__()
        self.linear1 = torch.nn.Linear(input_size, width_1)
        self.linear2 = torch.nn.Linear(width_1, width_2)
        self.linear3 = torch.nn.Linear(width_2, output_size)
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.sigmoid(self.linear3(x))
        return x


# Check GPU works

In [438]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


# Construct model, loss, optimizer

In [439]:
# Construct model for doto
model = DotoNet(125, 256, 64, 1)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9)
    

# Start training

In [440]:
for epoch in range(33):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1,  2000] loss: 0.696
[1,  4000] loss: 0.693
[1,  6000] loss: 0.692
[1,  8000] loss: 0.691
[2,  2000] loss: 0.690
[2,  4000] loss: 0.689
[2,  6000] loss: 0.689
[2,  8000] loss: 0.685
[3,  2000] loss: 0.685
[3,  4000] loss: 0.681
[3,  6000] loss: 0.679
[3,  8000] loss: 0.677
[4,  2000] loss: 0.674
[4,  4000] loss: 0.671
[4,  6000] loss: 0.667
[4,  8000] loss: 0.665
[5,  2000] loss: 0.660
[5,  4000] loss: 0.654
[5,  6000] loss: 0.654
[5,  8000] loss: 0.649
[6,  2000] loss: 0.643
[6,  4000] loss: 0.640
[6,  6000] loss: 0.637
[6,  8000] loss: 0.633
[7,  2000] loss: 0.628
[7,  4000] loss: 0.628
[7,  6000] loss: 0.620
[7,  8000] loss: 0.618
[8,  2000] loss: 0.614
[8,  4000] loss: 0.614
[8,  6000] loss: 0.609
[8,  8000] loss: 0.606
[9,  2000] loss: 0.604
[9,  4000] loss: 0.602
[9,  6000] loss: 0.600
[9,  8000] loss: 0.598
[10,  2000] loss: 0.595
[10,  4000] loss: 0.596
[10,  6000] loss: 0.592
[10,  8000] loss: 0.592
[11,  2000] loss: 0.589
[11,  4000] loss: 0.589
[11,  6000] loss: 0.589
[11,

In [448]:
correct = 0
total = 0
with torch.no_grad():
    for data in trainloader:
        inputs, labels = data
        outputs = model(inputs)
        predicted = outputs.data>=0.5
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the trainset: %d %%' % (
    100 * correct / total))

Accuracy of the network on the trainset: 83 %


In [451]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        outputs = model(inputs)
        predicted = outputs.data>=0.5
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the testset: %d %%' % (
    100 * correct / total))

Accuracy of the network on the testset: 83 %
