Import the required Libraries for our problem.

In [1]:
!pwd

/content


In [2]:
import pandas as pd
import torch
from torch import nn
import numpy as np

from scipy.sparse import lil_matrix
from scipy.sparse import hstack
from scipy.sparse import coo_matrix

from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
import torch.optim as optim

Use GPU for our computations

In [4]:
print(torch.cuda.device_count())
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  
device = torch.device(dev)  
print(device)

1
cuda:0


Import the Data inside

In [5]:
df = pd.read_csv('/content/drive/MyDrive/dota_games.csv')
df

Unnamed: 0,GameID,TimeStamp,GameSEQ,Leavers,RadiantWin,Pick1Rad,Pick2Rad,Pick3Rad,Pick4Rad,Pick5Rad,Pick1Dir,Pick2Dir,Pick3Dir,Pick4Dir,Pick5Dir,skill_level
0,5702035339,1605433074,4795818304,0,1,75,35,68,14,44,101,51,38,67,74,
1,5702036092,1605433111,4795818235,0,0,22,41,6,9,26,39,36,93,31,85,
2,5702039715,1605433280,4795818283,1,0,21,26,84,102,8,14,71,129,47,18,
3,5702041041,1605433346,4795848331,0,0,26,67,79,42,35,21,104,1,119,10,
4,5702041315,1605433377,4795818256,1,1,12,22,2,128,79,25,93,98,75,41,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16913394,5782875527,1610421230,4869157490,1,0,88,75,54,78,23,85,76,32,70,114,
16913395,5782876220,1610421300,4869155380,1,1,20,40,8,63,77,9,5,14,1,60,
16913396,5782878555,1610421525,4869149137,1,0,31,26,98,54,126,27,103,13,8,32,3.0
16913397,5782879828,1610421645,4869156196,1,0,129,30,41,11,68,104,101,32,31,106,2.0


Filter out games that have no leavers. Games with Leavers=0

In [6]:
df_no_leavers = df.query('Leavers==0')
df_no_leavers

Unnamed: 0,GameID,TimeStamp,GameSEQ,Leavers,RadiantWin,Pick1Rad,Pick2Rad,Pick3Rad,Pick4Rad,Pick5Rad,Pick1Dir,Pick2Dir,Pick3Dir,Pick4Dir,Pick5Dir,skill_level
0,5702035339,1605433074,4795818304,0,1,75,35,68,14,44,101,51,38,67,74,
1,5702036092,1605433111,4795818235,0,0,22,41,6,9,26,39,36,93,31,85,
3,5702041041,1605433346,4795848331,0,0,26,67,79,42,35,21,104,1,119,10,
6,5702044125,1605433531,4795863372,0,1,54,26,105,85,113,86,5,70,60,75,
7,5702044805,1605433556,4795853616,0,1,79,31,67,97,21,64,107,104,6,126,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16913388,5782874424,1610421130,4869157247,0,1,64,21,28,54,46,25,7,77,1,59,
16913389,5782874452,1610421125,4869156928,0,1,3,100,63,54,120,123,64,29,1,11,3.0
16913391,5782874747,1610421160,4869156248,0,1,21,27,2,44,61,87,107,67,71,48,3.0
16913393,5782875467,1610421194,4869156483,0,1,86,87,77,63,59,30,123,6,56,23,


Separate X and y.

In [7]:
X = df_no_leavers.loc[:,'Pick1Rad':'Pick5Dir']
y = df_no_leavers['RadiantWin']

df_size = df_no_leavers.shape[0]
train_data, vali_data ,  test_data = df_no_leavers.iloc[0:int(df_size*0.85),:] , df_no_leavers.iloc[int(df_size*0.85):int(df_size*0.95),:] , df_no_leavers.iloc[int(df_size*0.95):,:]

In [8]:
del df
del df_no_leavers

Build a Dummy classifier for a baseline.

In [9]:
dum_model = DummyClassifier(strategy='most_frequent')
dum_model.fit(X,y)
print(f'Training accuracy is {dum_model.score(None,train_data["RadiantWin"])}')
print(f'Testing accuracy is {dum_model.score(None,test_data["RadiantWin"])}')

Training accuracy is 0.5357758904987429
Testing accuracy is 0.5360385452498952


Data Loader

In [10]:
from torch.utils.data import DataLoader,Dataset
import random

class game_datasets(Dataset):

  def __init__(self,rawdata):
    X = rawdata.loc[:,'Pick1Rad':'Pick5Dir']
    y = rawdata['RadiantWin']
    self.x = torch.tensor(X.values)
    self.y = torch.tensor(y.values)

  def __getitem__(self,index):
    return self.x[index],self.y[index]
    
  def __len__(self):
    return len(self.y)

Transform every batch of information and also add games for earlier picks

In [11]:
def game_datasets_transform_X(data_X,mode=None,device="cpu"):
  # If mode is none only the 10 picks are added.
  # If mode is equal to 10 all possible combinations are added aswell.
  # If mode is either 1,2,3,4,5 the picks with those scenarios are only added.

  if mode is not None:
    picks = data_X.t()
    picks = picks.to(device)
    #1st picks
    picks_rad = torch.zeros(data_X.shape[0],150,device=device)
    picks_rad[range(picks_rad.shape[0]), picks[0]] = -1
    picks_dire = torch.zeros(data_X.shape[0],150,device=device)
    picks_dire[range(picks_dire.shape[0]), picks[5]] = 1
    if mode==10:
      res = torch.cat([picks_rad,picks_dire],dim=0) 
    if mode==1:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #2nd picks
    picks_rad[range(picks_rad.shape[0]), picks[1]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[6]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==2:
      return torch.cat([picks_rad,picks_dire],dim=0) 

    #3rd picks
    picks_rad[range(picks_rad.shape[0]), picks[5:7]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[0:2]] = -1

    picks_rad[range(picks_rad.shape[0]), picks[2]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[7]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==3:
      return torch.cat([picks_rad,picks_dire],dim=0)  

    #4th picks
    picks_rad[range(picks_rad.shape[0]), picks[3]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[8]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==4:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #5th picks
    picks_rad[range(picks_rad.shape[0]), picks[7:9]] = 1
    picks_dire[range(picks_dire.shape[0]), picks[2:4]] = -1

    picks_rad[range(picks_rad.shape[0]), picks[4]] = -1
    picks_dire[range(picks_dire.shape[0]), picks[9]] = 1
    if mode==10:
      res = torch.cat([res,picks_rad,picks_dire],dim=0)
    if mode==5:
      return torch.cat([picks_rad,picks_dire],dim=0)

    #All picks (Only for mode 10)
    picks_rad[range(picks_rad.shape[0]), picks[9]] = 1
    res = torch.cat([res,picks_rad],dim=0) 
    return res
  
  else:
    picks = data_X.t()
    picks = picks.to(device)
    picks_all = torch.zeros(data_X.shape[0],150,device=device)
    picks_all[range(picks_all.shape[0]), picks[0:5]]= -1
    picks_all[range(picks_all.shape[0]), picks[5:10]]= 1
    return picks_all

Also Y must be transformed to match X transformation if mode is anythig either that None

In [12]:
def game_datasets_transform_Y(data_Y,mode=None):
  #y_trans = []
  if mode==None:
    return data_Y
  
  y = data_Y.numpy()
  #for i, y in enumerate(data_Y.numpy()):
  if mode <10:
    # y_trans.append(y)
    # y_trans.append(y)
    res = np.tile(y,2)
  else:
    res = np.tile(y,11)
    # res = np.concatenate([y,y])
    # for _ in range(10):
    # #  y_trans.append(y)
    #   res = np.concatenate([res,y])
        
  return torch.tensor(res)

Define Model:

In [None]:
import torch.nn.functional as F
from torch import nn 

class GamePredictor(nn.Module):
    def __init__(self,n1,n2):
        super().__init__()
        self.l1 = nn.Linear(150, n1)
        self.l2 = nn.Linear(n1, n2)
        self.l3 = nn.Linear(n2,1)
    
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return torch.sigmoid(x)

Train Model (No Games Generated):

In [None]:
import torch.optim as optim

net = GamePredictor(100,100).to(device)
net.train()

optimizer = optim.Adam(net.parameters(), lr=0.001)

Epochs = 5

for epoch in range(0,Epochs):
  train_data_set = game_datasets(train_data)
  train_data_loader = DataLoader(train_data_set,batch_size=10000)
  train_data_iter = iter(train_data_loader)
  for data in train_data_iter:
    x,y = data
    net.zero_grad()
    x = game_datasets_transform_X(x)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    loss_func = nn.MSELoss()
    loss = loss_func(output,y)
    loss.backward()
    optimizer.step()
  print('Done Training')

Done Training
Done Training
Done Training
Done Training
Done Training


Accuracy on Training

In [None]:
net.eval()

train_data_set = game_datasets(train_data)
train_data_loader = DataLoader(train_data_set,batch_size=10000)
train_data_iter = iter(train_data_loader)

correct = 0
total = 0
with torch.no_grad():
  for data in train_data_iter:
    x,y = data
    x = game_datasets_transform_X(x)  
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)

    # Code below this is faster because
    # It's using numpy syntax

    # for idx,pred in enumerate(output):
    #   if pred<=0.5:
    #     res = 0
    #   else:
    #     res = 1
    #   if res==y[idx]:
    #     correct +=1
    #   total +=1

    res = output.clone().detach()
    res[res<=0.5] = 0
    res[res>0.5] = 1
    correct += torch.sum(res==y)
    total += len(output)
    
print(correct)
print(total)
print(correct/total * 100)

tensor(6774663, device='cuda:0')
11841033
tensor(57.2134, device='cuda:0')


Accuracy on Validation

In [None]:
net.eval()

test_data_set = game_datasets(vali_data)
test_data_loader = DataLoader(test_data_set,batch_size=10000)
test_data_iter = iter(test_data_loader)

correct = 0
total = 0
with torch.no_grad():
  for data in test_data_iter:
    x,y = data
    x = game_datasets_transform_X(x)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    res = output.clone().detach()
    res[res<=0.5] = 0
    res[res>0.5] = 1
    correct += torch.sum(res==y)
    total += len(output)
print(correct)
print(total)
print(correct/total * 100)

tensor(811244, device='cuda:0')
1393063
tensor(58.2346, device='cuda:0')


Now Let's Try adding All possible games and see if it effects the training score of all ten picks

In [None]:
import torch.optim as optim

torch.manual_seed(1000)
net = GamePredictor(50,50).to(device)
net.train()

optimizer = optim.Adam(net.parameters(), lr=0.001)

Epochs = 1 

for epoch in range(0,Epochs):
  train_data_set = game_datasets(train_data)
  train_data_loader = DataLoader(train_data_set,batch_size=10000)
  train_data_iter = iter(train_data_loader)
  del train_data_set
  del train_data_loader
  for data in train_data_iter:
    x,y = data
    net.zero_grad()
    x = game_datasets_transform_X(x,10,device)
    y = game_datasets_transform_Y(y,10)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    loss_func = nn.MSELoss()
    loss = loss_func(output,y)
    loss.backward()
    optimizer.step()
  print('Done Training')
  del train_data_iter

 

Done Training


In [None]:
torch.manual_seed(1000)
net.eval()

train_data_set = game_datasets(train_data)
train_data_loader = DataLoader(train_data_set,batch_size=10000)
train_data_iter = iter(train_data_loader)

correct = 0
total = 0
with torch.no_grad():
  for data in train_data_iter:
    x,y = data
    x = game_datasets_transform_X(x,None,device)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    res = output.clone().detach()
    res[res<=0.5] = 0
    res[res>0.5] = 1
    correct += torch.sum(res==y)
    total += len(output)
    
print(correct)
print(total)
print(correct/total * 100)

tensor(6587373, device='cuda:0')
11841033
tensor(55.6317, device='cuda:0')


Now For The Big part Let's See if we could increase the score by tweaking things. First to do this we need to have an altair chart of the accuracies within a function

In [None]:
def showaccs(net,test_data,device='cpu'):
  picks = []
  acc = []
  for i in range(1,6):
    net.eval()
    test_data_set = game_datasets(test_data)
    test_data_loader = DataLoader(test_data_set,batch_size=100000)
    test_data_iter = iter(test_data_loader)
    del test_data_set
    del test_data_loader
    correct = 0
    total = 0
    with torch.no_grad():
      for data in test_data_iter:
        x,y = data
        x = game_datasets_transform_X(x,i,device)
        y = game_datasets_transform_Y(y,i)   
        x = x.view(-1,150).float()
        y = y.view(-1,1).float()
        x = x.to(device)
        y = y.to(device)
        output = net(x)
        res = output.clone().detach()
        res[res<=0.5] = 0
        res[res>0.5] = 1
        correct += torch.sum(res==y)
        total += len(output)
    picks.append(i)
    acc.append(correct/total * 100)
  chart_data = pd.DataFrame({'pick_numbers':picks,'accuracy':acc})
  final_acc = (acc[0] + 1.5*acc[1] + 3*acc[2] + 4*acc[3] + 7*acc[4]) / 16.5 
  return final_acc,chart_data

In [None]:
res = (showaccs(net,vali_data,device))

In [None]:
print(res)

(tensor(55.7744, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.7368, device='cuda:0')
1             2  tensor(54.0942, device='cuda:0')
2             3  tensor(55.2084, device='cuda:0')
3             4  tensor(55.6363, device='cuda:0')
4             5  tensor(56.7470, device='cuda:0'))


Now let's try to increase the accuracy. Challenge: Increase Score by 1%! 

First things first let's do a fast hyperparameter optimization with 3 epochs and see how much the score could be improved.

In [None]:
neuron = [20,30,40,50,60,70,80,90,100]

for n in neuron:
  torch.manual_seed(1000)
  net = GamePredictor(n,n).to(device)
  net.train()

  optimizer = optim.Adam(net.parameters(), lr=0.001)

  Epochs = 3

  for epoch in range(0,Epochs):
    train_data_set = game_datasets(train_data)
    train_data_loader = DataLoader(train_data_set,batch_size=100000)
    train_data_iter = iter(train_data_loader)
    del train_data_set
    for data in train_data_iter:
      x,y = data
      net.zero_grad()
      x = game_datasets_transform_X(x,10,device)
      y = game_datasets_transform_Y(y,10)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.to(device)
      y = y.to(device)
      output = net(x)
      loss_func = nn.BCELoss()
      loss = loss_func(output,y)
      loss.backward()
      optimizer.step()
    print('Done Training')
    res = (showaccs(net,vali_data,device))
    print(f"For number of neurons of {n} and epoch number {epoch} the score is {res[0]}")
 

Done Training
For number of neurons of 20 and epoch number 0 the score is 55.65016174316406
Done Training
For number of neurons of 20 and epoch number 1 the score is 55.596221923828125
Done Training
For number of neurons of 20 and epoch number 2 the score is 55.56934356689453
Done Training
For number of neurons of 30 and epoch number 0 the score is 55.6831169128418
Done Training
For number of neurons of 30 and epoch number 1 the score is 55.63380813598633
Done Training
For number of neurons of 30 and epoch number 2 the score is 55.604759216308594
Done Training
For number of neurons of 40 and epoch number 0 the score is 55.64600372314453
Done Training
For number of neurons of 40 and epoch number 1 the score is 55.61870574951172
Done Training
For number of neurons of 40 and epoch number 2 the score is 55.614036560058594
Done Training
For number of neurons of 50 and epoch number 0 the score is 55.70673751831055
Done Training
For number of neurons of 50 and epoch number 1 the score is 55.6

In [None]:
torch.manual_seed(1000)
net = GamePredictor(50,50).to(device)
net.train()

optimizer = optim.Adam(net.parameters(), lr=0.001)

Epochs = 100

for epoch in range(0,Epochs):
  tot_loss = 0
  train_data_set = game_datasets(train_data)
  train_data_loader = DataLoader(train_data_set,batch_size=100000)
  train_data_iter = iter(train_data_loader)
  del train_data_set
  for data in train_data_iter:
    x,y = data
    net.zero_grad()
    x = game_datasets_transform_X(x,10,device)
    y = game_datasets_transform_Y(y,10)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    loss_func = nn.BCELoss()
    loss = loss_func(output,y)
    tot_loss += loss
    loss.backward()
    optimizer.step()
  print('Done Training')
  print(f'Loss is {tot_loss}')
  res = (showaccs(net,train_data,device))
  print(f"epoch number {epoch} the validation accuracy is {res[0]}")
  res = (showaccs(net,vali_data,device))
  print(f"epoch number {epoch} the validation accuracy is {res[0]}")

Done Training
Loss is 81.74137878417969
epoch number 0 the validation accuracy is 54.79117202758789
epoch number 0 the validation accuracy is 55.70673751831055
Done Training
Loss is 81.63037872314453
epoch number 1 the validation accuracy is 55.010536193847656
epoch number 1 the validation accuracy is 55.674034118652344
Done Training
Loss is 81.62556457519531
epoch number 2 the validation accuracy is 55.09693908691406
epoch number 2 the validation accuracy is 55.62784957885742
Done Training
Loss is 81.62619018554688
epoch number 3 the validation accuracy is 55.15245819091797
epoch number 3 the validation accuracy is 55.591094970703125
Done Training
Loss is 81.62263488769531
epoch number 4 the validation accuracy is 55.19149398803711
epoch number 4 the validation accuracy is 55.56679153442383
Done Training
Loss is 81.61612701416016
epoch number 5 the validation accuracy is 55.21190643310547
epoch number 5 the validation accuracy is 55.5654182434082
Done Training
Loss is 81.6102600097656

It looks like a bigger model has a higher accuracy but not by much. We Let's explore a model with only one layer see if the accuracy is almost the same or not

In [None]:
import torch.nn.functional as F
from torch import nn 

class GamePredictor_one_layer(nn.Module):
    def __init__(self,n1):
        super().__init__()
        self.l1 = nn.Linear(150, n1)
  
        self.l3 = nn.Linear(n1,1)
    
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = F.relu(self.l1(x))
        #x = F.relu(self.l2(x))
        x = self.l3(x)
        return torch.sigmoid(x)


neuron = [10,20,30,40,50,60,70,80,90,100]

for n in neuron:
  torch.manual_seed(1000)
  net = GamePredictor_one_layer(n).to(device)
  net.train()

  optimizer = optim.Adam(net.parameters(), lr=0.001)

  Epochs = 3

  for epoch in range(0,Epochs):
    train_data_set = game_datasets(train_data)
    train_data_loader = DataLoader(train_data_set,batch_size=100000)
    train_data_iter = iter(train_data_loader)
    for data in train_data_iter:
      x,y = data
      net.zero_grad()
      x = game_datasets_transform_X(x,10,device)
      y = game_datasets_transform_Y(y,10)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.to(device)
      y = y.to(device)
      output = net(x)
      loss_func = nn.BCELoss()
      loss = loss_func(output,y)
      loss.backward()
      optimizer.step()
    print('Done Training')
    res = (showaccs(net,vali_data,device))
    print(f"For number of neurons of {n} and epoch number {epoch} the score is {res[0]}")
 

Done Training
For number of neurons of 10 and epoch number 0 the score is 55.56721496582031
Done Training
For number of neurons of 10 and epoch number 1 the score is 55.56437301635742
Done Training
For number of neurons of 10 and epoch number 2 the score is 55.544464111328125
Done Training
For number of neurons of 20 and epoch number 0 the score is 55.50821304321289
Done Training
For number of neurons of 20 and epoch number 1 the score is 55.54490280151367
Done Training
For number of neurons of 20 and epoch number 2 the score is 55.53725814819336
Done Training
For number of neurons of 30 and epoch number 0 the score is 55.515380859375
Done Training
For number of neurons of 30 and epoch number 1 the score is 55.553897857666016
Done Training
For number of neurons of 30 and epoch number 2 the score is 55.550140380859375
Done Training
For number of neurons of 40 and epoch number 0 the score is 55.673866271972656
Done Training
For number of neurons of 40 and epoch number 1 the score is 55.6

Ok Now we found a good set of hyperparameters. I will later return to this notebook if I see an interest in my model and users willing to use it. The Final model would be 100 neurons, with relu activations and lr=0.001

Final model

In [None]:
class GamePredictor_final(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(150, 100)
        self.l3 = nn.Linear(100,1)
    
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = F.relu(self.l1(x))
        #x = F.relu(self.l2(x))
        x = self.l3(x)
        return torch.sigmoid(x)

In [None]:
import torch.optim as optim

torch.manual_seed(1000)
net = GamePredictor_final().to(device)
net.train()

optimizer = optim.Adam(net.parameters(), lr=0.001)

Epochs = 3

for epoch in range(0,Epochs):
  train_data_set = game_datasets(train_data)
  train_data_loader = DataLoader(train_data_set,batch_size=100000)
  train_data_iter = iter(train_data_loader)
  for data in train_data_iter:
    x,y = data
    net.zero_grad()
    x = game_datasets_transform_X(x,10,device)
    y = game_datasets_transform_Y(y,10)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.to(device)
    y = y.to(device)
    output = net(x)
    loss_func = nn.MSELoss()
    loss = loss_func(output,y)
    loss.backward()
    optimizer.step()
  print('Done Training')

Done Training
Done Training
Done Training


Test Score

In [None]:
res = (showaccs(net,test_data,device))
print(res)

(tensor(55.5896, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.7029, device='cuda:0')
1             2  tensor(54.0057, device='cuda:0')
2             3  tensor(55.0588, device='cuda:0')
3             4  tensor(55.4642, device='cuda:0')
4             5  tensor(56.4977, device='cuda:0'))


And Finally Saving the model

In [None]:
!pwd

/content


In [None]:
torch.save(net.state_dict(), "./model")

I didn't test the fact that we would train a model for only that number of pick Let's do that and see

In [None]:
import torch.optim as optim

for stage in range(1,6):
  torch.manual_seed(1000)
  net = GamePredictor_final().to(device)
  net.train()

  optimizer = optim.Adam(net.parameters(), lr=0.001)

  Epochs = 3

  for epoch in range(0,Epochs):
    train_data_set = game_datasets(train_data)
    train_data_loader = DataLoader(train_data_set,batch_size=100000)
    train_data_iter = iter(train_data_loader)
    for data in train_data_iter:
      x,y = data
      net.zero_grad()
      x = game_datasets_transform_X(x,stage,device)
      y = game_datasets_transform_Y(y,stage)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.to(device)
      y = y.to(device)
      output = net(x)
      loss_func = nn.MSELoss()
      loss = loss_func(output,y)
      loss.backward()
      optimizer.step()
    print('Done Training')
  res = (showaccs(net,test_data,device))
  print(res)

Done Training
Done Training
Done Training
(tensor(54.6454, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.7161, device='cuda:0')
1             2  tensor(53.9585, device='cuda:0')
2             3  tensor(54.5378, device='cuda:0')
3             4  tensor(54.6570, device='cuda:0')
4             5  tensor(54.9647, device='cuda:0'))
Done Training
Done Training
Done Training
(tensor(55.2463, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.5998, device='cuda:0')
1             2  tensor(54.0538, device='cuda:0')
2             3  tensor(54.9835, device='cuda:0')
3             4  tensor(55.2310, device='cuda:0')
4             5  tensor(55.8585, device='cuda:0'))
Done Training
Done Training
Done Training
(tensor(55.4791, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.4545, device='cuda:0')
1             2  tensor(53.9639, device='cuda:0')
2             3  

In [None]:
res = (showaccs(net,test_data,device))
print(res)

(tensor(54.6454, device='cuda:0'),    pick_numbers                          accuracy
0             1  tensor(53.7161, device='cuda:0')
1             2  tensor(53.9585, device='cuda:0')
2             3  tensor(54.5378, device='cuda:0')
3             4  tensor(54.6570, device='cuda:0')
4             5  tensor(54.9647, device='cuda:0'))


Let's try out Linear regression and see how it does. maybe a more simple model could fit faster than a neural network

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures

Epochs = 1
sgd_classes = [SGDClassifier() for i in range(6)]

for epoch in range(0,Epochs):
  train_data_set = game_datasets(train_data)
  train_data_loader = DataLoader(train_data_set,batch_size=20000)
  train_data_iter = iter(train_data_loader)
  poly = PolynomialFeatures(degree=2)
  del train_data_set
  for data in train_data_iter:
    for class_number in range(1,6):
      x,y = data
      x = game_datasets_transform_X(x,class_number)
      y = game_datasets_transform_Y(y,class_number)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.numpy()
      x = poly.fit_transform(x)
      y = y.numpy().ravel()
      sgd_classes[class_number].partial_fit(x,y,[0,1])
  print('Done Training')

KeyboardInterrupt: ignored

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures
import pickle

for i in range(1,6):
  pkl_filename = "/content/drive/MyDrive/models/" + f"pickle_model{str(i)}.pkl"
  with open(pkl_filename, 'wb') as file:
    pickle.dump(sgd_classes[i], file)

In [13]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import PolynomialFeatures
import pickle

sgd_classes = [SGDClassifier() for i in range(6)]

for i in range(1,6):
  pkl_filename = "/content/drive/MyDrive/models/" + f"pickle_model{str(i)}.pkl"
  print(pkl_filename)
  with open(pkl_filename, 'rb') as file:
    sgd_classes[i] = pickle.load(file)
    

/content/drive/MyDrive/models/pickle_model1.pkl
/content/drive/MyDrive/models/pickle_model2.pkl
/content/drive/MyDrive/models/pickle_model3.pkl
/content/drive/MyDrive/models/pickle_model4.pkl
/content/drive/MyDrive/models/pickle_model5.pkl


In [None]:
def showaccs_linear(sgd_classes,test_data):
  picks = []
  acc = []
  for i in range(1,6):
    test_data_set = game_datasets(test_data)
    test_data_loader = DataLoader(test_data_set,batch_size=100000)
    test_data_iter = iter(test_data_loader)
    del test_data_set
    del test_data_loader
    correct = 0
    total = 0
    poly = PolynomialFeatures(degree=2)
    for data in test_data_iter:
      x,y = data
      x = game_datasets_transform_X(x,i)
      y = game_datasets_transform_Y(y,i)
      x = x.view(-1,150).float()
      y = y.view(-1,1).float()
      x = x.numpy()
      print(x)
      x = poly.fit_transform(x)
      y = y.numpy().ravel()
      output = sgd_classes[i].predict(x)
      res = output
      res[res<=0.5] = 0
      res[res>0.5] = 1
      correct += np.sum(res==y)
      total += len(output)
    picks.append(i)
    acc.append(correct/total * 100)
  chart_data = pd.DataFrame({'pick_numbers':picks,'accuracy':acc})
  final_acc = (acc[0] + 1.5*acc[1] + 3*acc[2] + 4*acc[3] + 7*acc[4]) / 16.5 
  return final_acc,chart_data

In [None]:
res = (showaccs_linear(sgd_classes,test_data))
print(res)

In [14]:
picks = []
acc = []
for i in range(1,6):
  test_data_set = game_datasets(test_data)
  test_data_loader = DataLoader(test_data_set,batch_size=20000)
  test_data_iter = iter(test_data_loader)
  del test_data_set
  del test_data_loader
  correct = 0
  total = 0
  poly = PolynomialFeatures(degree=2)
  for data in test_data_iter:
    x,y = data
    x = game_datasets_transform_X(x,i)
    y = game_datasets_transform_Y(y,i)
    x = x.view(-1,150).float()
    y = y.view(-1,1).float()
    x = x.numpy()
    x = poly.fit_transform(x)
    y = y.numpy().ravel()
    output = sgd_classes[i].predict(x)
    res = output
    res[res<=0.5] = 0
    res[res>0.5] = 1
    correct += np.sum(res==y)
    total += len(output)
  picks.append(i)
  acc.append(correct/total * 100)
chart_data = pd.DataFrame({'pick_numbers':picks,'accuracy':acc})
final_acc = (acc[0] + 1.5*acc[1] + 3*acc[2] + 4*acc[3] + 7*acc[4]) / 16.5 
print(final_acc,chart_data)

55.300615743208475    pick_numbers   accuracy
0             1  53.692149
1             2  53.810880
2             3  54.791955
3             4  55.156762
4             5  56.149825


In [None]:
sgd_classes[2]

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)