# DraftNet Development
daniel.brooks@alumni.caltech.edu <br>
July 1, 2019 <br>  


In [1]:
#Preprocessing imports.
import numpy as np
from sklearn import preprocessing
from tqdm import tqdm

import draftsimtools as ds


In [2]:
#Torch imports.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset

# Load the Dataset

In [3]:
# Toggle GPU/CPU mode.
# device = torch.device("cpu")
device = torch.device("cuda:0")

In [4]:
def create_le(cardnames):
    """Create label encoder for cardnames."""
    le = preprocessing.LabelEncoder()
    le.fit(cardnames)
    return le

def draft_to_matrix(cur_draft, le, pack_size=15):
    """Transform draft from cardname list to one hot encoding."""
    pick_list = [np.append(le.transform(cur_draft[i]), (pack_size-len(x))*[0]) \
                 for i, x in enumerate(cur_draft)]
    pick_matrix = np.int16(pick_list, device=device)
    return pick_matrix

def drafts_to_tensor(drafts, le, pack_size=15):
    """Create tensor of shape (num_drafts, 45, 15)."""
    pick_tensor_list = [draft_to_matrix(d, le) for d in drafts]
    pick_tensor = np.int16(pick_tensor_list, device=device)
    return pick_tensor

#Drafts dataset class.
class DraftDataset(Dataset):
    """Defines a draft dataset in PyTorch."""
    
    def __init__(self, drafts_tensor, le):
        """Initialization.
        """
        self.drafts_tensor = drafts_tensor
        self.le = le
        self.cards_in_set = len(self.le.classes_)
        self.pack_size = int(self.drafts_tensor.shape[1]/3)
        
    def __getitem__(self, index):
        """Return a training example.
        """
        #Compute number of picks in a draft.
        draft_size = self.pack_size*3
        
        #Grab information on current draft.
        pick_num = index % draft_size #0-self.pack_size*3-1
        draft_num = int((index - pick_num)/draft_size)
        
        #Generate.
        x = self.create_new_x(pick_num, draft_num)
        y = self.create_new_y(pick_num, draft_num)
        return x, y
    
    def create_new_x(self, pick_num, draft_num):
        """Generate x, input, as a row vector.
        0:n     : collection vector
                  x[i]=n -> collection has n copies of card i
        n:2n    : pack vector
                  0 -> card not in pack
                  1 -> card in pack
        Efficiency optimization possible. Iterative adds to numpy array.
        """
        #Initialize collection / cards in pack vector.
        x = np.zeros([self.cards_in_set * 2], dtype = "int16")
        
        #Fill in collection vector excluding current pick (first half).
        for n in self.drafts_tensor[draft_num, :pick_num, 0]:
            x[n] += 1
            
        #Fill in pack vector.
        cards_in_pack =  self.pack_size - pick_num%self.pack_size #Cards in current pack.
        for n in self.drafts_tensor[draft_num, pick_num, :cards_in_pack]:
            x[n + self.cards_in_set] = 1
            
        #Convert to Torch tensor.
        x = torch.Tensor(x)
        return x
    
    def create_new_y(self, pick_num, draft_num, not_in_pack=0.5):
        """Generate y, a target pick row vector.
        Picked card is assigned a value of 1.
        Other cards are assigned a value of 0.
        """
        #Initialize target vector.
        #y = np.array([0] * self.cards_in_set)
        y = np.zeros([self.cards_in_set], dtype = "int16")
            
        #Add picked card.
        y[self.drafts_tensor[draft_num, pick_num, 0]] = 1
        y = torch.Tensor(y)
        return y
    
    def __len__(self):
        return len(self.drafts_tensor)

def load_dataset(rating_path1, rating_path2, drafts_path):
    """Create drafts tensor from drafts and set files."""
    # Load the set. inputs
    cur_set = ds.create_set(rating_path1, rating_path2)
    raw_drafts = ds.load_drafts(drafts_path)
    
    # Fix commas. 
    cur_set, raw_drafts = ds.fix_commas(cur_set, raw_drafts)
    
    # Process drafts. 
    drafts = ds.process_drafts(raw_drafts)
    
    # Drop empty elements at end, if present. 
    while len(drafts[-1]) == 0:
        drafts = drafts[:-1]
    
    # Create a label encoder.
    le = create_le(cur_set["Name"].values)
    
    # Create drafts tensor. 
    drafts_tensor = drafts_to_tensor(drafts, le)
    
    # Create a dataset.
    cur_dataset = DraftDataset(drafts_tensor, le)
    
    # Get the tensor
    return cur_dataset, drafts_tensor, cur_set, le

In [5]:
# Define rating file paths. 
rating_path1 = "data/m19_rating.tsv"
rating_path2 = "data/m19_land_rating.tsv"

# Load data. 
train_data, train_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/train_small.csv")
val_data, val_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/val_small.csv")
#test_data, test_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/test.csv")

Processing draft: 0.
Processing draft: 0.


# Define the NN

In [6]:
#Implement NN.
class DraftNet(nn.Module):
    
    def __init__(self, ss):
        """Placeholder NN. Currently does nothing.
        
        param ss: number of cards in set
        """
        super(DraftNet, self).__init__()
        
        self.ss = ss

        size_in = self.ss
        size1 = self.ss
        size2 = self.ss
        size3 = self.ss
        size4 = self.ss
        
        #Placeholder. 
        #x -> (4, 285, 2)
        #y -> (4, 285, 1)inputs
        self.linear1 = torch.nn.Linear(size_in, size1)
        self.relu1 = torch.nn.ReLU()
        self.dropout1 = nn.Dropout(0.1)
        
        self.linear2 = torch.nn.Linear(size1, size2)
        self.relu2 = torch.nn.ReLU()
        #self.dropout2 = nn.Dropout(0.5)
        
        self.linear3 = torch.nn.Linear(size2, size3)
        self.relu3 = torch.nn.ReLU()
        #self.dropout3 = nn.Dropout(0.5)
        
        self.linear4 = torch.nn.Linear(size3, size4)
        self.relu4 = torch.nn.ReLU()
        
        #self.sm = torch.nn.Softmax()
                
    def forward(self, x):
        
        collection = x[:, :self.ss]
        pack = x[:, self.ss:]
        
        y = self.linear1(collection)
        y = self.relu1(y)
        y = self.dropout1(y)
        
        y = self.linear2(y)
        y = self.relu2(y)
        
        y = self.linear3(y)
        y = self.relu3(y)

        y = self.linear4(y)
        y = self.relu4(y)
        
        y = y * pack # Enforce cards in pack only.
       
        #y = self.sm(y, dim=1)
        
        #y = F.log_softmax(y, dim=1)
        
        return y

#Create NN.
net = DraftNet(len(m19_set)).cuda()
print(net)

DraftNet(
  (linear1): Linear(in_features=285, out_features=285, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.1)
  (linear2): Linear(in_features=285, out_features=285, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=285, out_features=285, bias=True)
  (relu3): ReLU()
  (linear4): Linear(in_features=285, out_features=285, bias=True)
  (relu4): ReLU()
)


# Network training

In [7]:
def train_net(net, dataloader, num_epoch, criterion, optimizer):
    """Train the network."""
    
    net.train()
    
    for epoch in range(num_epoch):
        
        #Loop over x,y for each dataset.
        running_loss = 0
        for i, data in enumerate(dataloader):
        
            #Get the inputs. Keeps batch size.
            x, y = data
            
            # cuda() is needed for GPU mode. Not sure why.
            x = x.cuda()
            y = y.cuda()
            
            # Zero parameter gradients between batches.
            optimizer.zero_grad()
        
            #Perform training.
            y_pred = net(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
        
            #Print loss data.
            running_loss += loss.item()
            step = 1
            if i % len(dataloader) == len(dataloader)-1 and (epoch + 1) % step == 0:
                print('Epoch %d, Average Loss: %.6f' % (epoch+1, running_loss/len(dataloader)))
                running_loss = 0.0

In [8]:
###############################
# Define training parameters. #
###############################
net = net
trainloader = torch.utils.data.DataLoader(train_data, batch_size=10, shuffle=True)
num_epoch = 20
train_criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.2, momentum=0.2)

#################
# Train network #
#################
train_net(net, trainloader, num_epoch, train_criterion, optimizer)

Epoch 1, Average Loss: 0.003426
Epoch 2, Average Loss: 0.003423
Epoch 3, Average Loss: 0.003421
Epoch 4, Average Loss: 0.003418
Epoch 5, Average Loss: 0.003417
Epoch 6, Average Loss: 0.003415
Epoch 7, Average Loss: 0.003412
Epoch 8, Average Loss: 0.003410
Epoch 9, Average Loss: 0.003409
Epoch 10, Average Loss: 0.003405
Epoch 11, Average Loss: 0.003403
Epoch 12, Average Loss: 0.003402
Epoch 13, Average Loss: 0.003400
Epoch 14, Average Loss: 0.003398
Epoch 15, Average Loss: 0.003395
Epoch 16, Average Loss: 0.003392
Epoch 17, Average Loss: 0.003387
Epoch 18, Average Loss: 0.003384
Epoch 19, Average Loss: 0.003384
Epoch 20, Average Loss: 0.003380


# Network validation

In [9]:
def val_net(net, dataloader, criterion):
    net.eval()
    val_loss = 0
    correct = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloader):
        
            #Get the inputs. Keeps batch size.
            x, y = data
            
            # cuda() is needed for GPU mode. Not sure why.
            x = x.cuda()
            y = y.cuda()
            
            # Compute val loss.
            y_pred = net(x)
            #val_loss += criterion(y_pred, y, reduction='sum').item() # sum up batch loss
            val_loss += val_criterion(y_pred, y)
            
            # Get prediction. 
            pred = y_pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(y.view_as(pred)).sum().item()
            
    test_loss /= len(dataloader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(dataloader.dataset),
        100. * correct / len(dataloader.dataset)))

In [10]:
#################################
# Define validation parameters. #
#################################
net = net
valloader = torch.utils.data.DataLoader(val_data, batch_size=10, shuffle=True)
val_criterion = nn.MSELoss()

#################
# Train network #
#################
val_net(net, valloader, val_criterion)

RuntimeError: shape '[10, 1]' is invalid for input of size 2850

In [None]:
# To Do:
# 1. Debug loss function. 
# 2. Run validation.