Import the required Libraries for our problem.

In [3]:
!pwd

/home/khashayar/Desktop/projects/dotamodel


In [4]:
import pandas as pd
import torch
from torch import nn
import numpy as np

from scipy.sparse import lil_matrix
from scipy.sparse import hstack
from scipy.sparse import coo_matrix

from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
import torch.optim as optim

Use GPU for our computations

In [5]:
print(torch.cuda.device_count())
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  
device = torch.device(dev)  
print(device)

0
cpu


Import the Data inside

In [6]:
df = pd.read_csv('dota_games.csv')
df

Unnamed: 0,GameID,TimeStamp,GameSEQ,Leavers,RadiantWin,Pick1Rad,Pick2Rad,Pick3Rad,Pick4Rad,Pick5Rad,Pick1Dir,Pick2Dir,Pick3Dir,Pick4Dir,Pick5Dir,skill_level
0,5702035339,1605433074,4795818304,0,1,75,35,68,14,44,101,51,38,67,74,
1,5702036092,1605433111,4795818235,0,0,22,41,6,9,26,39,36,93,31,85,
2,5702039715,1605433280,4795818283,1,0,21,26,84,102,8,14,71,129,47,18,
3,5702041041,1605433346,4795848331,0,0,26,67,79,42,35,21,104,1,119,10,
4,5702041315,1605433377,4795818256,1,1,12,22,2,128,79,25,93,98,75,41,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16913394,5782875527,1610421230,4869157490,1,0,88,75,54,78,23,85,76,32,70,114,
16913395,5782876220,1610421300,4869155380,1,1,20,40,8,63,77,9,5,14,1,60,
16913396,5782878555,1610421525,4869149137,1,0,31,26,98,54,126,27,103,13,8,32,3.0
16913397,5782879828,1610421645,4869156196,1,0,129,30,41,11,68,104,101,32,31,106,2.0


Filter out games that have no leavers. Games with Leavers=0

In [7]:
df_no_leavers = df.query('Leavers==0')
df_no_leavers

Unnamed: 0,GameID,TimeStamp,GameSEQ,Leavers,RadiantWin,Pick1Rad,Pick2Rad,Pick3Rad,Pick4Rad,Pick5Rad,Pick1Dir,Pick2Dir,Pick3Dir,Pick4Dir,Pick5Dir,skill_level
0,5702035339,1605433074,4795818304,0,1,75,35,68,14,44,101,51,38,67,74,
1,5702036092,1605433111,4795818235,0,0,22,41,6,9,26,39,36,93,31,85,
3,5702041041,1605433346,4795848331,0,0,26,67,79,42,35,21,104,1,119,10,
6,5702044125,1605433531,4795863372,0,1,54,26,105,85,113,86,5,70,60,75,
7,5702044805,1605433556,4795853616,0,1,79,31,67,97,21,64,107,104,6,126,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16913388,5782874424,1610421130,4869157247,0,1,64,21,28,54,46,25,7,77,1,59,
16913389,5782874452,1610421125,4869156928,0,1,3,100,63,54,120,123,64,29,1,11,3.0
16913391,5782874747,1610421160,4869156248,0,1,21,27,2,44,61,87,107,67,71,48,3.0
16913393,5782875467,1610421194,4869156483,0,1,86,87,77,63,59,30,123,6,56,23,


Separate X and y.

In [8]:
X = df_no_leavers.loc[:,'Pick1Rad':'Pick5Dir']
y = df_no_leavers['RadiantWin']

df_size = df_no_leavers.shape[0]
train_data, vali_data ,  test_data = df_no_leavers.iloc[0:int(df_size*0.85),:] , df_no_leavers.iloc[int(df_size*0.85):int(df_size*0.95),:] , df_no_leavers.iloc[int(df_size*0.95):,:]

In [9]:
del df
del df_no_leavers

Build a Dummy classifier for a baseline.

Data Loader

In [10]:
from torch.utils.data import DataLoader,Dataset
import random

class game_datasets(Dataset):

  def __init__(self,rawdata):
    X = rawdata.loc[:,'Pick1Rad':'Pick5Dir']
    y = rawdata['RadiantWin']
    self.x = torch.tensor(X.values)
    self.y = torch.tensor(y.values)

  def __getitem__(self,index):
    return self.x[index],self.y[index]
    
  def __len__(self):
    return len(self.y)

Transform every batch of information and also add games for earlier picks

In [11]:
def game_datasets_transform_X(data_X,mode=None,device="cpu"):
  # If mode is none only the 10 picks are added.
  # If mode is equal to 10 all possible combinations are added aswell.
  # If mode is either 1,2,3,4,5 the picks with those scenarios are only added.

  if mode is not None:
    picks = data_X.t()
    picks = picks.to(device)
    #1st picks
    picks_rad = torch.zeros(data_X.shape[0],150,device=device)
    picks_rad[range(picks_rad.shape[0]), picks[0]] = -1
    picks_dire = torch.zeros(data_X.shape[0],150,device=device)
    picks_dire[range(picks_dire.shape[0]), picks[5]] = 1
    if mode==10:
      res = torch.cat([picks_rad,picks_dire],dim=0) 
    if mode==1:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #2nd picks
    picks_rad[range(picks_rad.shape[0]), picks[1]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[6]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==2:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #3rd picks
    picks_rad[range(picks_rad.shape[0]), picks[5:7]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[0:2]] = -1

    picks_rad[range(picks_rad.shape[0]), picks[2]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[7]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==3:
      return torch.cat([picks_rad,picks_dire],dim=0)  

    #4th picks
    picks_rad[range(picks_rad.shape[0]), picks[3]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[8]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==4:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #5th picks
    picks_rad[range(picks_rad.shape[0]), picks[7:9]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[2:4]] = -1

    picks_rad[range(picks_rad.shape[0]), picks[4]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[9]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==5:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #All picks (Only for mode 10)
    picks_rad[range(picks_rad.shape[0]), picks[9]] = 1
    res = torch.cat([res,picks_rad],dim=0) 
    return res
  
  else:
    picks = data_X.t()
    picks = picks.to(device)
    picks_all = torch.zeros(data_X.shape[0],150,device=device)
    picks_all[range(picks_all.shape[0]), picks[0:5]]= -1
    picks_all[range(picks_all.shape[0]), picks[5:10]]= 1
    return picks_all

Also Y must be transformed to match X transformation if mode is anythig either that None

In [12]:
def game_datasets_transform_Y(data_Y,mode=None):
  #y_trans = []
  if mode==None:
    return data_Y
  
  y = data_Y.numpy()
  #for i, y in enumerate(data_Y.numpy()):
  if mode <10:
    # y_trans.append(y)
    # y_trans.append(y)
    res = np.tile(y,2)
  else:
    res = np.tile(y,11)
    # res = np.concatenate([y,y])
    # for _ in range(10):
    # #  y_trans.append(y)
    #   res = np.concatenate([res,y])
        
  return torch.tensor(res)

Define Model:

Let's try out Linear regression and see how it does. maybe a more simple model could fit faster than a neural network. We would try out 4 settings with different epochs and see what the results would be.

In [13]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

def showaccs_linear(sgd_classes,test_data):
  picks = []
  acc = []
  for i in range(1,6):
    test_data_set = game_datasets(test_data)
    test_data_loader = DataLoader(test_data_set,batch_size=5000)
    test_data_iter = iter(test_data_loader)
    del test_data_set
    del test_data_loader
    correct = 0
    total = 0
    poly = PolynomialFeatures(degree=2)
    for data in test_data_iter:
      x,y = data
      x = game_datasets_transform_X(x,i)
      y = game_datasets_transform_Y(y,i)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.numpy()
      x = poly.fit_transform(x)
      y = y.numpy().ravel()
      output = sgd_classes[i].predict(x)
      res = output
      res[res<=0.5] = 0
      res[res>0.5] = 1
      correct += np.sum(res==y)
      total += len(output)
    picks.append(i)
    acc.append(correct/total * 100)
  chart_data = pd.DataFrame({'pick_numbers':picks,'accuracy':acc})
  final_acc = (acc[0] + 1.5*acc[1] + 3*acc[2] + 4*acc[3] + 7*acc[4]) / 16.5 
  return final_acc,chart_data

In [18]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

losses = ["hinge","log","modified_huber","squared_hinge"]
etas = [0.0001,0.001,0.01,0.1,1]

for losss in losses:
    for eta in etas:
        print(f"loss {losss} and eta {eta}")
        sgd_classes = [SGDClassifier(learning_rate="constant",eta0=eta,random_state=10,loss=losss,n_jobs=-1) for i in range(6)]
        train_data_set = game_datasets(train_data)
        train_data_loader = DataLoader(train_data_set,batch_size=5000)
        train_data_iter = iter(train_data_loader)
        poly = PolynomialFeatures(degree=2)
        del train_data_set
        for data in train_data_iter:
            for class_number in range(1,6):
                x,y = data
                x = game_datasets_transform_X(x,class_number)
                y = game_datasets_transform_Y(y,class_number)
                x = x.view(-1,150).float()
                y = y.view(-1,1).float()
                x = x.numpy()
                x = poly.fit_transform(x)
                y = y.numpy().ravel()
                sgd_classes[class_number].partial_fit(x,y,[0,1])
        print('Done Training')
        print('Train Acc:  ')
        print(showaccs_linear(sgd_classes,train_data))
        print('Valid Acc:  ')
        print(showaccs_linear(sgd_classes,test_data))

loss hinge and eta 0.0001
Done Training
Train Acc:  
(54.73855450812698,    pick_numbers   accuracy
0             1  53.577589
1             2  53.698997
2             3  54.359991
3             4  54.659720
4             5  55.334459)
Valid Acc:  
(55.20350910806769,    pick_numbers   accuracy
0             1  53.603855
1             2  53.716915
2             3  54.644510
3             4  55.050091
4             5  56.077826)
loss hinge and eta 0.001
Done Training
Train Acc:  
(54.58690926200391,    pick_numbers   accuracy
0             1  53.622568
1             2  53.739264
2             3  54.431902
3             4  54.630183
4             5  54.948014)
Valid Acc:  
(55.24348638771235,    pick_numbers   accuracy
0             1  53.704496
1             2  53.816982
2             3  54.891304
3             4  55.249866
4             5  55.916311)
loss hinge and eta 0.01
Done Training
Train Acc:  
(52.174335659991016,    pick_numbers   accuracy
0             1  53.587495
1          

KeyboardInterrupt: 

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

losses = ["modified_huber","squared_hinge"]
etas = [0.0001,0.001,0.01]

for losss in losses:
    for eta in etas:
        print(f"loss {losss} and eta {eta}")
        sgd_classes = [SGDClassifier(learning_rate="constant",eta0=eta,random_state=10,loss=losss,n_jobs=-1) for i in range(6)]
        train_data_set = game_datasets(train_data)
        train_data_loader = DataLoader(train_data_set,batch_size=5000)
        train_data_iter = iter(train_data_loader)
        poly = PolynomialFeatures(degree=2)
        del train_data_set
        for data in train_data_iter:
            for class_number in range(1,6):
                x,y = data
                x = game_datasets_transform_X(x,class_number)
                y = game_datasets_transform_Y(y,class_number)
                x = x.view(-1,150).float()
                y = y.view(-1,1).float()
                x = x.numpy()
                x = poly.fit_transform(x)
                y = y.numpy().ravel()
                sgd_classes[class_number].partial_fit(x,y,[0,1])
        print('Done Training')
        print('Train Acc:  ')
        print(showaccs_linear(sgd_classes,train_data))
        print('Valid Acc:  ')
        print(showaccs_linear(sgd_classes,test_data))

loss modified_huber and eta 0.0001
Done Training
Train Acc:  
(54.35601698847482,    pick_numbers   accuracy
0             1  53.540536
1             2  53.725676
2             3  54.127321
3             4  54.314526
4             5  54.729309)
Valid Acc:  
(55.09915226762407,    pick_numbers   accuracy
0             1  53.721078
1             2  53.987254
2             3  54.719740
3             4  55.090147
4             5  55.702035)
loss modified_huber and eta 0.001
Done Training
Train Acc:  
(52.225116062701005,    pick_numbers   accuracy
0             1  52.493292
1             2  52.601817
2             3  52.226824
3             4  52.355998
4             5  52.030562)
Valid Acc:  
(52.634791605650086,    pick_numbers   accuracy
0             1  52.793339
1             2  52.849618
2             3  52.347200
3             4  52.759098
4             5  52.618329)
loss modified_huber and eta 0.01
Done Training
Train Acc:  


In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

losses = ["squared_hinge"]
etas = [0.0001,0.001,0.01]

for losss in losses:
    for eta in etas:
        print(f"loss {losss} and eta {eta}")
        sgd_classes = [SGDClassifier(learning_rate="constant",eta0=eta,random_state=10,loss=losss,n_jobs=-1) for i in range(6)]
        train_data_set = game_datasets(train_data)
        train_data_loader = DataLoader(train_data_set,batch_size=5000)
        train_data_iter = iter(train_data_loader)
        poly = PolynomialFeatures(degree=2)
        del train_data_set
        for data in train_data_iter:
            for class_number in range(1,6):
                x,y = data
                x = game_datasets_transform_X(x,class_number)
                y = game_datasets_transform_Y(y,class_number)
                x = x.view(-1,150).float()
                y = y.view(-1,1).float()
                x = x.numpy()
                x = poly.fit_transform(x)
                y = y.numpy().ravel()
                sgd_classes[class_number].partial_fit(x,y,[0,1])
        print('Done Training')
        print('Train Acc:  ')
        print(showaccs_linear(sgd_classes,train_data))
        print('Valid Acc:  ')
        print(showaccs_linear(sgd_classes,test_data))

loss squared_hinge and eta 0.0001
Done Training
Train Acc:  
(54.356020571290806,    pick_numbers   accuracy
0             1  53.540536
1             2  53.725676
2             3  54.127321
3             4  54.314526
4             5  54.729317)
Valid Acc:  
(55.09915226762407,    pick_numbers   accuracy
0             1  53.721078
1             2  53.987254
2             3  54.719740
3             4  55.090147
4             5  55.702035)
loss squared_hinge and eta 0.001
Done Training
Train Acc:  
(52.224996806111484,    pick_numbers   accuracy
0             1  52.493292
1             2  52.601817
2             3  52.226799
3             4  52.355968
4             5  52.030309)
Valid Acc:  
(52.6342477858704,    pick_numbers   accuracy
0             1  52.793339
1             2  52.849618
2             3  52.347128
3             4  52.759170
4             5  52.617037)
loss squared_hinge and eta 0.01


Lets try out log loss with new data transform

In [15]:
def game_datasets_transform_X(data_X,mode=None,device="cpu"):
  # If mode is none only the 10 picks are added.
  # If mode is equal to 10 all possible combinations are added aswell.
  # If mode is either 1,2,3,4,5 the picks with those scenarios are only added.

  if mode is not None:
    picks = data_X.t()
    picks = picks.to(device)
    #picks = data_X
    #1st picks
    picks_rad = torch.zeros(data_X.shape[0],300,device=device)
    picks_rad[range(picks_rad.shape[0]), picks[0]] = 1
    picks_dire = torch.zeros(data_X.shape[0],300,device=device)
    picks_dire[range(picks_dire.shape[0]), picks[5] + 150] = 1
    if mode==10:
      res = torch.cat([picks_rad,picks_dire],dim=0) 
    if mode==1:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #2nd picks
    picks_rad[range(picks_rad.shape[0]), picks[1]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[6] + 150] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==2:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #3rd picks
    picks_rad[range(picks_rad.shape[0]), picks[5:7] + 150] = 1
    picks_dire[range(picks_dire.shape[0]), picks[0:2]] = 1

    picks_rad[range(picks_rad.shape[0]), picks[2]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[7] + 150] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==3:
      return torch.cat([picks_rad,picks_dire],dim=0)  

    #4th picks
    picks_rad[range(picks_rad.shape[0]), picks[3]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[8] + 150] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==4:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #5th picks
    picks_rad[range(picks_rad.shape[0]), picks[7:9] + 150] = 1
    picks_dire[range(picks_dire.shape[0]), picks[2:4]] = 1

    picks_rad[range(picks_rad.shape[0]), picks[4]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[9] + 150] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==5:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #All picks (Only for mode 10)
    picks_rad[range(picks_rad.shape[0]), picks[9]] = 1
    res = torch.cat([res,picks_rad],dim=0) 
    return res
  
  else:
    picks = data_X.t()
    picks = picks.to(device)
    picks_all = torch.zeros(data_X.shape[0],150,device=device)
    picks_all[range(picks_all.shape[0]), picks[0:5]]= -1
    picks_all[range(picks_all.shape[0]), picks[5:10]]= 1
    return picks_all

In [14]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

def showaccs_linear(sgd_classes,test_data):
  picks = []
  acc = []
  for i in range(1,6):
    test_data_set = game_datasets(test_data)
    test_data_loader = DataLoader(test_data_set,batch_size=2500)
    test_data_iter = iter(test_data_loader)
    del test_data_set
    del test_data_loader
    correct = 0
    total = 0
    poly = PolynomialFeatures(degree=2)
    for data in test_data_iter:
      x,y = data
      x = game_datasets_transform_X(x,i)
      y = game_datasets_transform_Y(y,i)
      x = x.view(-1,300).float()
      y = y.view(-1,1).float()
      x = x.numpy()
      x = poly.fit_transform(x)
      y = y.numpy().ravel()
      output = sgd_classes[i].predict(x)
      res = output
      res[res<=0.5] = 0
      res[res>0.5] = 1
      correct += np.sum(res==y)
      total += len(output)
    picks.append(i)
    acc.append(correct/total * 100)
  chart_data = pd.DataFrame({'pick_numbers':picks,'accuracy':acc})
  final_acc = (acc[0] + 1.5*acc[1] + 3*acc[2] + 4*acc[3] + 7*acc[4]) / 16.5 
  return final_acc,chart_data

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

losses = ["log"]
etas = [0.0001,0.0005,0.001]

for losss in losses:
    for eta in etas:
        print(f"loss {losss} and eta {eta}")
        sgd_classes = [SGDClassifier(learning_rate="constant",eta0=eta,random_state=10,loss=losss,n_jobs=-1) for i in range(6)]
        train_data_set = game_datasets(train_data)
        train_data_loader = DataLoader(train_data_set,batch_size=2500)
        train_data_iter = iter(train_data_loader)
        poly = PolynomialFeatures(degree=2)
        del train_data_set
        for data in train_data_iter:
            for class_number in range(1,6):
                x,y = data
                x = game_datasets_transform_X(x,class_number)
                y = game_datasets_transform_Y(y,class_number)
                x = x.view(-1,300).float()
                y = y.view(-1,1).float()
                x = x.numpy()
                x = poly.fit_transform(x)
                y = y.numpy().ravel()
                sgd_classes[class_number].partial_fit(x,y,[0,1])
                
        print('Done Training')
        print('Train Acc:  ')
        print(showaccs_linear(sgd_classes,train_data))
        print('Valid Acc:  ')
        print(showaccs_linear(sgd_classes,test_data))

loss log and eta 0.0001
Done Training
Train Acc:  
(55.67747072251367,    pick_numbers   accuracy
0             1  53.622568
1             2  53.875354
2             3  54.772451
3             4  55.179835
4             5  57.029425)
Valid Acc:  
(56.33448240277503,    pick_numbers   accuracy
0             1  53.704496
1             2  54.071026
2             3  55.308586
3             4  55.872810
4             5  57.898704)
loss log and eta 0.0005
