# Pytorch Model 0.2
daniel.brooks@alumni.caltech.edu <br>
March 10, 2019 <br>  


In [1]:
#Preprocessing imports.
import numpy as np
from sklearn import preprocessing
from tqdm import tqdm

import draftsimtools as ds


In [2]:
#Torch imports.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
# GPU or CPU mode.
# device = torch.device("cpu")
device = torch.device("cuda:0")

In [4]:
#Load 1000 M19 drafts.
m19_set = ds.create_set("data/m19_rating.tsv", "data/m19_land_rating.tsv")
raw_drafts = ds.load_drafts("data/m19_1000drafts.csv")

In [5]:
#Fix commas in card names.
m19_set, raw_drafts = ds.fix_commas(m19_set, raw_drafts)

In [6]:
#Store rating information in a dictionary.
rating_dict = ds.create_rating_dict(m19_set)

In [7]:
#Process the draft data.
drafts = ds.process_drafts(raw_drafts)

Processing draft: 0.


In [30]:
# Load train, val and test.
raw_train = ds.load_drafts("data/subset1000/train1000.csv")
raw_val = ds.load_drafts("data/subset1000/val1000.csv")
raw_test = ds.load_drafts("data/subset1000/test1000.csv")
m19_set, train = ds.fix_commas(m19_set, raw_train)
m19_set, val = ds.fix_commas(m19_set, raw_val)
m19_set, test = ds.fix_commas(m19_set, raw_test)

In [8]:
#Create an M19 player. 
#b = ds.SGDBot(rating_dict)

In [9]:
#%%time
#Optimize rating parameters using stochastic gradient descent. (1 minute / 1000 drafts)
#for x in range(10):
#    b.sgd_optimization(drafts[0:25], 0.05)
#print("Done.")

In [10]:
#Write new ratings to file.
#b.write_rating_dict("sgd_05_linear.tsv")
#b.write_error("error.csv")

In [21]:
len(drafts)

1000

# PyTorch Preprocessing

In [11]:
#Save draft data as a tensor.
def create_le(cardnames):
    """Create label encoder for cardnames."""
    le = preprocessing.LabelEncoder()
    le.fit(cardnames)
    return le

def draft_to_matrix(cur_draft, le, pack_size=15):
    """Transform draft from cardname list to one hot encoding."""
    pick_list = [np.append(le.transform(cur_draft[i]), (pack_size-len(x))*[0]) \
                 for i, x in enumerate(cur_draft)]
    pick_matrix = np.int16(pick_list, device=device)
    return pick_matrix

def drafts_to_tensor(drafts, le, pack_size=15):
    """Create tensor of shape (num_drafts, 45, 15)."""
    pick_tensor_list = [draft_to_matrix(d, le) for d in drafts]
    pick_tensor = np.int16(pick_tensor_list, device=device)
    return pick_tensor

#Create drafts tensor.
le = create_le(m19_set["Name"].values)
drafts_tensor = drafts_to_tensor(drafts, le)

In [22]:
drafts_tensor.shape

(1000, 45, 15)

In [12]:
#Create torch dataset class.
from torch.utils.data.dataset import Dataset

#Drafts dataset class.
class DraftDataset(Dataset):
    """Defines a draft dataset in PyTorch."""
    
    def __init__(self, drafts_tensor, le):
        """Initialization.
        """
        self.drafts_tensor = drafts_tensor
        self.le = le
        self.cards_in_set = len(self.le.classes_)
        self.pack_size = int(self.drafts_tensor.shape[1]/3)
        
    def __getitem__(self, index):
        """Return a training example.
        """
        #Compute number of picks in a draft.
        draft_size = self.pack_size*3
        
        #Grab information on current draft.
        pick_num = index % draft_size #0-self.pack_size*3-1
        draft_num = int((index - pick_num)/draft_size)
        
        #Generate.
        x = self.create_new_x(pick_num, draft_num)
        y = self.create_new_y(pick_num, draft_num)
        return x, y
    
    def create_new_x(self, pick_num, draft_num):
        """Generate x, input, as a row vector.
        0:n     : collection vector
                  x[i]=n -> collection has n copies of card i
        n:2n    : pack vector
                  0 -> card not in pack
                  1 -> card in pack
        Efficiency optimization possible. Iterative adds to numpy array.
        """
        #Initialize collection / cards in pack vector.
        x = np.zeros([self.cards_in_set * 2], dtype = "int16")
        
        #Fill in collection vector excluding current pick (first half).
        for n in self.drafts_tensor[draft_num, :pick_num, 0]:
            x[n] += 1
            
        #Fill in pack vector.
        cards_in_pack =  self.pack_size - pick_num%self.pack_size #Cards in current pack.
        for n in self.drafts_tensor[draft_num, pick_num, :cards_in_pack]:
            x[n + self.cards_in_set] = 1
            
        #Convert to Torch tensor.
        x = torch.Tensor(x)
        return x
    
    def create_new_y(self, pick_num, draft_num, not_in_pack=0.5):
        """Generate y, a target pick row vector.
        Picked card is assigned a value of 1.
        Other cards are assigned a value of 0.
        """
        #Initialize target vector.
        #y = np.array([0] * self.cards_in_set)
        y = np.zeros([self.cards_in_set], dtype = "int16")
            
        #Add picked card.
        y[self.drafts_tensor[draft_num, pick_num, 0]] = 1
        y = torch.Tensor(y)
        return y
    
    def __len__(self):
        return len(self.drafts_tensor)
    
#Create a draft dataset.
d = DraftDataset(drafts_tensor, le)

In [13]:
#Implement NN.
class TestNet(nn.Module):
    
    def __init__(self, ss):
        """Placeholder NN. Currently does nothing.
        
        param ss: number of cards in set
        """
        super(TestNet, self).__init__()
        
        self.ss = ss

        size_in = self.ss
        size1 = self.ss
        size2 = self.ss
        size3 = self.ss
        size4 = self.ss
        
        #Placeholder. 
        #x -> (4, 285, 2)
        #y -> (4, 285, 1)
        self.linear1 = torch.nn.Linear(size_in, size1)
        self.relu1 = torch.nn.ReLU()
        self.dropout1 = nn.Dropout(0.1)
        
        self.linear2 = torch.nn.Linear(size1, size2)
        self.relu2 = torch.nn.ReLU()
        #self.dropout2 = nn.Dropout(0.5)
        
        self.linear3 = torch.nn.Linear(size2, size3)
        self.relu3 = torch.nn.ReLU()
        #self.dropout3 = nn.Dropout(0.5)
        
        self.linear4 = torch.nn.Linear(size3, size4)
        self.relu4 = torch.nn.ReLU()
        
        #self.sm = torch.nn.Softmax()
                
    def forward(self, x):
        
        collection = x[:, :self.ss]
        pack = x[:, self.ss:]
        
        y = self.linear1(collection)
        y = self.relu1(y)
        y = self.dropout1(y)
        
        y = self.linear2(y)
        y = self.relu2(y)
        
        y = self.linear3(y)
        y = self.relu3(y)

        y = self.linear4(y)
        y = self.relu4(y)
        
        y = y * pack # Enforce cards in pack only.
       
        #y = self.sm(y, dim=1)
        
        #y = F.softmax(y, dim=1)
        
        return y
    
# class MultiplyVec(nn.Module):
#     def __init__(self, pack):
#         super(MultiplyVec, self).__init__()
#         self.pack = pack
#         
#     def forward(self, x):
#         return x * pack # Element-wise multiplication.

#Create NN.
net = TestNet(len(m19_set)).cuda()
print(net)


TestNet(
  (linear1): Linear(in_features=285, out_features=285, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.1)
  (linear2): Linear(in_features=285, out_features=285, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=285, out_features=285, bias=True)
  (relu3): ReLU()
  (linear4): Linear(in_features=285, out_features=285, bias=True)
  (relu4): ReLU()
)


In [14]:
###############################
# Define training parameters. #
###############################

#Define training set. Batchsize must be 1.
trainset = d
trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True)

#Loss function.
criterion = nn.MSELoss()
# criterion = nn.CrossEntropyLoss()

#Define optimizer. 
#optimizer = optim.SGD(net.parameters(), lr=0.5, momentum=0.9)
optimizer = optim.SGD(net.parameters(), lr=0.2, momentum=0.2)


In [15]:
#def visualize_pick()

In [16]:
net

TestNet(
  (linear1): Linear(in_features=285, out_features=285, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.1)
  (linear2): Linear(in_features=285, out_features=285, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=285, out_features=285, bias=True)
  (relu3): ReLU()
  (linear4): Linear(in_features=285, out_features=285, bias=True)
  (relu4): ReLU()
)

In [None]:
# Print results of training.
# output[0].cpu().detach().numpy()

In [18]:
for i, data in enumerate(trainloader):
    if i == 1:
        break
    
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    
    output = net(inputs)
    
    print("==============")
    for zi in range(len(output[0])):
        print(output[zi])
    print("==============")


tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0748, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0142, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0032, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0147, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 

IndexError: index 10 is out of bounds for dimension 0 with size 10

In [None]:
len(trainset[0][0])

In [19]:
# Train network.

#Set up pulled from pytorch tutorial.
#https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-download-beginner-blitz-cifar10-tutorial-py

NUM_EPOCH = 100
num_draft = len(trainloader)

for epoch in range(NUM_EPOCH):
    
    #Loop over x,y for each dataset.
    running_loss = 0
    for i, data in enumerate(trainloader):
        
        #Get the inputs. Keeps batch size.
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        #print(inputs.shape, labels.shape)
        #inputs = data[0][0,:,:]
        #labels = data[1][0,:,:]
        #print(inputs.shape, labels.shape)
                
        #Zero parameter gradients between batches.
        optimizer.zero_grad()
        
        #Perform training.
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        #Print loss data.
        running_loss += loss.item()
        step = 1
        if i % num_draft == num_draft-1 and (epoch + 1) % step == 0:
            print('Epoch %d, Average Loss: %.6f' % (epoch+1, running_loss/num_draft))
            running_loss = 0.0
            
print("Finished Training")


Epoch 1, Average Loss: 0.003421
Epoch 2, Average Loss: 0.003417
Epoch 3, Average Loss: 0.003414
Epoch 4, Average Loss: 0.003410
Epoch 5, Average Loss: 0.003405
Epoch 6, Average Loss: 0.003401
Epoch 7, Average Loss: 0.003396
Epoch 8, Average Loss: 0.003392
Epoch 9, Average Loss: 0.003389
Epoch 10, Average Loss: 0.003384
Epoch 11, Average Loss: 0.003380
Epoch 12, Average Loss: 0.003377
Epoch 13, Average Loss: 0.003372
Epoch 14, Average Loss: 0.003368
Epoch 15, Average Loss: 0.003364
Epoch 16, Average Loss: 0.003361
Epoch 17, Average Loss: 0.003358
Epoch 18, Average Loss: 0.003353
Epoch 19, Average Loss: 0.003349
Epoch 20, Average Loss: 0.003346
Epoch 21, Average Loss: 0.003341
Epoch 22, Average Loss: 0.003337
Epoch 23, Average Loss: 0.003333
Epoch 24, Average Loss: 0.003330
Epoch 25, Average Loss: 0.003327
Epoch 26, Average Loss: 0.003322
Epoch 27, Average Loss: 0.003319
Epoch 28, Average Loss: 0.003314
Epoch 29, Average Loss: 0.003312
Epoch 30, Average Loss: 0.003308
Epoch 31, Average L

In [None]:
# Next step: develop intuition for results. 

# For each example -> print input/target rating

In [None]:
# Next step: polish and hyperparameter tuning.
