# DraftNet Development
daniel.brooks@alumni.caltech.edu <br>
July 1, 2019 <br>  


In [1]:
#Preprocessing imports.
import numpy as np
from sklearn import preprocessing
from tqdm import tqdm

import draftsimtools as ds


In [2]:
#Torch imports.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset

# Load the Dataset

In [3]:
# Toggle GPU/CPU mode.
# device = torch.device("cpu")
device = torch.device("cuda:0")

In [4]:
def create_le(cardnames):
    """Create label encoder for cardnames."""
    le = preprocessing.LabelEncoder()
    le.fit(cardnames)
    return le

def draft_to_matrix(cur_draft, le, pack_size=15):
    """Transform draft from cardname list to one hot encoding."""
    pick_list = [np.append(le.transform(cur_draft[i]), (pack_size-len(x))*[0]) \
                 for i, x in enumerate(cur_draft)]
    pick_matrix = np.int16(pick_list, device=device)
    return pick_matrix

def drafts_to_tensor(drafts, le, pack_size=15):
    """Create tensor of shape (num_drafts, 45, 15)."""
    pick_tensor_list = [draft_to_matrix(d, le) for d in drafts]
    pick_tensor = np.int16(pick_tensor_list, device=device)
    return pick_tensor

#Drafts dataset class.
class DraftDataset(Dataset):
    """Defines a draft dataset in PyTorch."""
    
    def __init__(self, drafts_tensor, le):
        """Initialization.
        """
        self.drafts_tensor = drafts_tensor
        self.le = le
        self.cards_in_set = len(self.le.classes_)
        self.pack_size = int(self.drafts_tensor.shape[1]/3)
        self.draft_size = self.pack_size*3
        
    def __getitem__(self, index):
        """Return a training example.
        """
        #Grab information on current draft.
        pick_num = index % self.draft_size #0-self.pack_size*3-1
        draft_num = int((index - pick_num)/self.draft_size)
        
        #Generate.
        x = self.create_new_x(pick_num, draft_num)
        y = self.create_new_y(pick_num, draft_num)
        return x, y
    
    def create_new_x(self, pick_num, draft_num):
        """Generate x, input, as a row vector.
        0:n     : collection vector
                  x[i]=n -> collection has n copies of card i
        n:2n    : pack vector
                  0 -> card not in pack
                  1 -> card in pack
        Efficiency optimization possible. Iterative adds to numpy array.
        """
        #Initialize collection / cards in pack vector.
        x = np.zeros([self.cards_in_set * 2], dtype = "int16")
        
        #Fill in collection vector excluding current pick (first half).
        for n in self.drafts_tensor[draft_num, :pick_num, 0]:
            x[n] += 1
            
        #Fill in pack vector.
        cards_in_pack =  self.pack_size - pick_num%self.pack_size #Cards in current pack.
        for n in self.drafts_tensor[draft_num, pick_num, :cards_in_pack]:
            x[n + self.cards_in_set] = 1
            
        #Convert to Torch tensor.
        x = torch.Tensor(x)
        return x
    
    def create_new_y(self, pick_num, draft_num, not_in_pack=0.5):
        """Generate y, a target pick row vector.
        Picked card is assigned a value of 1.
        Other cards are assigned a value of 0.
        """
        #Initialize target vector.
        #y = np.array([0] * self.cards_in_set)
        y = np.zeros([self.cards_in_set], dtype = "int16")
            
        #Add picked card.
        y[self.drafts_tensor[draft_num, pick_num, 0]] = 1
        #y = torch.Tensor(y, dtype=torch.int64) # Needed as target.
        y = torch.tensor(y, dtype=torch.int64, device=device) # Needed as target.
        return y
    
    def __len__(self):
        return len(self.drafts_tensor) * self.draft_size

def load_dataset(rating_path1, rating_path2, drafts_path):
    """Create drafts tensor from drafts and set files."""
    # Load the set. inputs
    cur_set = ds.create_set(rating_path1, rating_path2)
    raw_drafts = ds.load_drafts(drafts_path)
    
    # Fix commas. 
    cur_set, raw_drafts = ds.fix_commas(cur_set, raw_drafts)
    
    # Process drafts. 
    drafts = ds.process_drafts(raw_drafts)
    
    # Drop empty elements at end, if present. 
    while len(drafts[-1]) == 0:
        drafts = drafts[:-1]
    
    # Create a label encoder.
    le = create_le(cur_set["Name"].values)
    
    # Create drafts tensor. 
    drafts_tensor = drafts_to_tensor(drafts, le)
    
    # Create a dataset.
    cur_dataset = DraftDataset(drafts_tensor, le)
    
    # Get the tensor
    return cur_dataset, drafts_tensor, cur_set, le

In [5]:
# Define rating file paths. 
rating_path1 = "data/m19_rating.tsv"
rating_path2 = "data/m19_land_rating.tsv"

# Load data. 
train_data, train_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/train.csv")
val_data, val_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/val.csv")
#test_data, test_tensor, m19_set, le = load_dataset(rating_path1, rating_path2, "data/subset20000/test.csv")

Processing draft: 0.
Processing draft: 10000.
Processing draft: 0.


In [6]:
def create_set_vector(casting_cost, card_type, rarity, color_vector):
    """
    Returns a one hot encoded card property vector. 
    
    There are 21 binary features:
    
    0. cmc=0
    1. cmc=1
    2. cmc=2
    3. cmc=3
    4. cmc=4
    5. cmc=5
    6. cmc=6
    7. cmc>=7
    8. creature?
    9. common?
    10. uncommon?
    11. rare?
    12. mythic?
    13. colorless?
    14. monocolored?
    15. multicolored?
    16. color1?
    17. color2?
    18. color3?
    19. color4?
    20. color5?
    
    :param casting_cost: integer casting cost of card
    :param card_type: "Creature" or other
    :param rarity": "C", "U", "R", or "M"
    "param color_vector": vector corresponding to colors of card, example: [1,0,0,0,1]
    
    """
    # Initialize set vector.
    v = [0] * 21
    
    # Encode cmc. 
    if casting_cost == 0:
        v[0] = 1
    elif casting_cost == 1:
        v[1] = 1
    elif casting_cost == 2:
        v[2] = 1
    elif casting_cost == 3:
        v[3] = 1
    elif casting_cost == 4:
        v[4] = 1
    elif casting_cost == 5:
        v[5] = 1
    elif casting_cost == 6:
        v[6] = 1
    elif casting_cost >= 7:
        v[7] = 1
    else:
        print("WARNING: Undefined casting cost.")
    
    # Encode type.
    if card_type == "Creature":
        v[8] = 1
        
    # Encode rarity.
    if rarity == "C":
        v[9] = 1
    elif rarity == "U":
        v[10] = 1
    elif rarity == "R":
        v[11] = 1
    elif rarity == "M":
        v[12] = 1
    
    # Process number of colors.
    num_colors = len([c for c in color_vector if c > 0])
    if num_colors == 0:
        v[13] = 1
    elif num_colors == 1:
        v[14] = 1
    elif num_colors >= 2:
        v[15] = 1
    
    # Process card color. 
    if color_vector[0] > 0:
        v[16] = 1
    if color_vector[1] > 0:
        v[17] = 1
    if color_vector[2] > 0:
        v[18] = 1
    if color_vector[3] > 0:
        v[19] = 1
    if color_vector[4] > 0:
        v[20] = 1
    return v

In [7]:
def cmc_from_string(cmc_string):
    """
    Return an integer converted mana cost from cmc_string. 
    
    Each character adds 1 to cmc. 
    
    :param cmc_string: String or integer representation of cmc. Example: "1UBR".
    :returns: Integer cmc. Example: 4.
    """
    # If int, we are done. 
    if type(cmc_string) is int:
        return cmc_string
    
    # Convert string to integer cmc.
    cmc = 0
    digit_string = ""
    letters = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
    digits = set("1234567890")
        
    for c in cmc_string:        
        if c in letters:
            cmc += 1
        else:
            digit_string += c
    if len(digit_string) > 0:
        cmc += int(digit_string)
    return cmc

In [8]:
def create_set_tensor(magic_set):
    """
    Returns a set tensor which represents the properties of cards in the set.
    
    There are M features and N cards in the set and the tensor is of size M x N.
    
    The features are documented in the create_set_vector() function. 
    """
    set_list = []
    
    # Requires these names to be present in the set file.
    reduced_set = magic_set[["Name", "Casting Cost 1", "Card Type", "Rarity", "Color Vector"]]
    for index, row in reduced_set.iterrows():
        card_vector = create_set_vector(cmc_from_string(row[1]), row[2], row[3], row[4])
        set_list.append(card_vector)
        
    # set_list is currently N x M list of lists. 
    set_flipped = torch.Tensor(set_list)
    set_tensor = torch.transpose(set_flipped, 0, 1)
    return set_tensor

In [208]:
# Set tensor.
st = create_set_tensor(m19_set).cuda()
print(st.shape)

torch.Size([21, 285])


# Define the NN

In [224]:
#Implement NN.
class DraftNet(nn.Module):
    
    def __init__(self, set_tensor):
        """Placeholder NN. Currently does nothing.
        
        param ss: number of cards in set
        param set_tensor: Mxss set tensor describing the set
        """
        super(DraftNet, self).__init__()
        
        # Load set tensor.
        self.set_tensor = set_tensor
        self.set_tensor_tranpose = torch.transpose(set_tensor, 0, 1)
        self.M, self.ss = self.set_tensor.shape
        self.half_ss = self.ss / 2
        
        # Specify layer sizes. 
        size_in = self.ss + self.M
        #size_in = self.ss
        size1 = self.ss
        size2 = self.ss
        size3 = self.ss
        size4 = self.ss
        size5 = self.ss
        size6 = self.ss
        size7 = self.ss
        size8 = self.ss
        
        self.ns = 0.01
        
        self.bn = nn.BatchNorm1d(self.ss + self.M)
        
        self.linear1 = torch.nn.Linear(size_in, size1)
        self.bn1 = nn.BatchNorm1d(size1)
        self.relu1 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout1 = nn.Dropout(0.5)
        
        self.linear2 = torch.nn.Linear(size1, size2)
        self.bn2 = nn.BatchNorm1d(size2)
        self.relu2 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout2 = nn.Dropout(0.5)
        
        self.linear3 = torch.nn.Linear(size2, size3)
        self.bn3 = nn.BatchNorm1d(size3)
        self.relu3 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout3 = nn.Dropout(0.5)
        
        self.linear4 = torch.nn.Linear(size3, size4)
        self.relu4 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout4 = nn.Dropout(0.5)
        
        self.linear5 = torch.nn.Linear(size3, size5)
        self.relu5 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout5 = nn.Dropout(0.5)
        
        self.linear6 = torch.nn.Linear(size3, size6)
        self.relu6 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout6 = nn.Dropout(0.5)
        
        self.linear7 = torch.nn.Linear(size3, size7)
        self.relu7 = torch.nn.LeakyReLU(negative_slope = self.ns)
        self.dropout7 = nn.Dropout(0.5)
        
        self.linear8 = torch.nn.Linear(size3, size8)
        self.relu8 = torch.nn.LeakyReLU(negative_slope = self.ns)
        
        
        #self.sm = torch.nn.Softmax()
                
    def forward(self, x):
        
        collection = x[:, :self.ss]
        
        #collection = self.bn(collection)
        
        pack = x[:, self.ss:]
        
        # Get features from set tensor. 
        features = torch.mm(collection, self.set_tensor_tranpose)
        collection_and_features = torch.cat((collection, features), 1)
        
        collection_and_features = self.bn(collection_and_features)
        
        #y = self.linear1(collection_and_features)
        y = self.linear1(collection_and_features)
        y = self.bn1(y)
        y = self.relu1(y)
        y = self.dropout1(y)
        
        y = self.linear2(y)
        y = self.bn2(y)
        y = self.relu2(y)
        y = self.dropout2(y)
        
        y = self.linear3(y)
        y = self.bn3(y)
        y = self.relu3(y)
        y = self.dropout3(y)

        y = self.linear4(y)
        #y = self.relu4(y)
        #y = self.dropout4(y)
        
        #y = self.linear5(y)
        #y = self.relu5(y)
        #y = self.dropout5(y)
        
        #y = self.linear6(y)
        #y = self.relu6(y)
        #y = self.dropout6(y)
        
        #y = self.linear7(y)
        #y = self.relu7(y)
        #y = self.dropout7(y)
        
        #y = self.linear8(y)
        #y = self.relu8(y)
        
        y = y * pack # Enforce cards in pack only.
        
        return y

#Create NN.
net = DraftNet(st).cuda()
#print(net)

# Network training

In [225]:
def train_net(net, dataloader, num_epoch, optimizer):
    """Train the network."""
    net.train()    
    my_count = 0
    for epoch in range(num_epoch):
        
        #Loop over x,y for each dataset.
        running_loss = 0
        for i, data in enumerate(dataloader):
        
            my_count+=1
            if my_count % 10000 == 0:
                print(my_count)
        
            #Get the inputs. Keeps batch size.
            x, y = data
            
            # cuda() is needed for GPU mode. Not sure why.
            x = x.cuda()
            y = y.cuda() # One-hot encoded. 
            
            # Zero parameter gradients between batches.
            optimizer.zero_grad()
        
            #Perform training.
            y_pred = net(x)
            y_integer = torch.argmax(y, 1) # Class indices.
            
            # Use cross entropy loss. 
            loss = torch.nn.CrossEntropyLoss()
            output = loss(y_pred, y_integer)
            output.backward()
            optimizer.step()
                        
            #Print loss data.
            running_loss += output.item()
            step = 1
            if i % len(dataloader) == len(dataloader)-1 and (epoch + 1) % step == 0:
                print('Train Cross-Entropy Loss: %.6f' % (running_loss/len(dataloader)))
                running_loss = 0.0

In [226]:
# Define dataloaders. 
trainloader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)
valloader = torch.utils.data.DataLoader(val_data, batch_size=100, shuffle=False)

# Network validation

In [227]:
def val_net(net, dataloader):
    """Compute accuracy on validation set."""
    net.eval()
    correct = 0.0
    total = 0.0
    
    with torch.no_grad():
        for i, data in enumerate(dataloader):
        
            #Get the inputs. Keeps batch size.
            x, y = data
            
            # cuda() is needed for GPU mode. Not sure why.
            x = x.cuda()
            y = y.cuda()
            y_integer = torch.argmax(y, 1) # Class indices.
            
            # Compute val loss.
            y_pred = net(x)
            y_pred_integer = torch.argmax(y_pred, 1)
            
            # Compute accuracy. 
            correct += int(sum(y_pred_integer == y_integer))
            total += len(y_integer)
            
    accuracy = correct / total

    print("Validation accuracy:", accuracy, " Total picks:", int(total))

In [228]:
# Train the network over several epochs - no momentu, from scratch.
run = 0

# Now leave optimizer out here. 
#optimizer = optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999))
#optimizer = optim.ASGD(net.parameters())
#optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.0)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.33) # Used for exp 47.
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.98)

ep = 0

for run in range(999999):
    
    ep += 1
    print("Epoch:", ep)
    
    # Train1.
    train_net(net, trainloader, 1, optimizer)
    scheduler.step()
    
    # Validation. 
    val_net(net, valloader)

Epoch: 1
Train Cross-Entropy Loss: 1.569925
Validation accuracy: 0.6077377777777778  Total picks: 225000
Epoch: 2
Train Cross-Entropy Loss: 1.134533
Validation accuracy: 0.6163333333333333  Total picks: 225000
Epoch: 3
Train Cross-Entropy Loss: 1.101022
Validation accuracy: 0.6208177777777778  Total picks: 225000
Epoch: 4
Train Cross-Entropy Loss: 1.080183
Validation accuracy: 0.6237333333333334  Total picks: 225000
Epoch: 5
Train Cross-Entropy Loss: 1.065954
Validation accuracy: 0.6260488888888889  Total picks: 225000
Epoch: 6
Train Cross-Entropy Loss: 1.057866
Validation accuracy: 0.6267244444444444  Total picks: 225000
Epoch: 7
Train Cross-Entropy Loss: 1.049609
Validation accuracy: 0.6265644444444445  Total picks: 225000
Epoch: 8
Train Cross-Entropy Loss: 1.042938
Validation accuracy: 0.6283688888888889  Total picks: 225000
Epoch: 9
Train Cross-Entropy Loss: 1.037118
Validation accuracy: 0.6306488888888889  Total picks: 225000
Epoch: 10
Train Cross-Entropy Loss: 1.030989
Validation

Train Cross-Entropy Loss: 0.950264
Validation accuracy: 0.6386444444444445  Total picks: 225000
Epoch: 81
Train Cross-Entropy Loss: 0.948675
Validation accuracy: 0.6387244444444444  Total picks: 225000
Epoch: 82
Train Cross-Entropy Loss: 0.949747
Validation accuracy: 0.6383066666666667  Total picks: 225000
Epoch: 83
Train Cross-Entropy Loss: 0.949027
Validation accuracy: 0.6379866666666667  Total picks: 225000
Epoch: 84
Train Cross-Entropy Loss: 0.948605
Validation accuracy: 0.6385866666666666  Total picks: 225000
Epoch: 85
Train Cross-Entropy Loss: 0.948492
Validation accuracy: 0.63852  Total picks: 225000
Epoch: 86
Train Cross-Entropy Loss: 0.949200
Validation accuracy: 0.6385377777777778  Total picks: 225000
Epoch: 87
Train Cross-Entropy Loss: 0.947401
Validation accuracy: 0.6384044444444444  Total picks: 225000
Epoch: 88
Train Cross-Entropy Loss: 0.947165
Validation accuracy: 0.6386177777777777  Total picks: 225000
Epoch: 89
Train Cross-Entropy Loss: 0.947270
Validation accuracy: 0

Train Cross-Entropy Loss: 0.936350
Validation accuracy: 0.6385377777777778  Total picks: 225000
Epoch: 159
Train Cross-Entropy Loss: 0.936672
Validation accuracy: 0.6391422222222222  Total picks: 225000
Epoch: 160
Train Cross-Entropy Loss: 0.937264
Validation accuracy: 0.6386088888888889  Total picks: 225000
Epoch: 161
Train Cross-Entropy Loss: 0.937133
Validation accuracy: 0.6384266666666667  Total picks: 225000
Epoch: 162
Train Cross-Entropy Loss: 0.936246
Validation accuracy: 0.6387377777777777  Total picks: 225000
Epoch: 163
Train Cross-Entropy Loss: 0.935892
Validation accuracy: 0.6387777777777778  Total picks: 225000
Epoch: 164
Train Cross-Entropy Loss: 0.937334
Validation accuracy: 0.6388933333333333  Total picks: 225000
Epoch: 165
Train Cross-Entropy Loss: 0.935945
Validation accuracy: 0.6386711111111111  Total picks: 225000
Epoch: 166
Train Cross-Entropy Loss: 0.937259
Validation accuracy: 0.6389733333333333  Total picks: 225000
Epoch: 167
Train Cross-Entropy Loss: 0.937292
Va

Validation accuracy: 0.6385288888888889  Total picks: 225000
Epoch: 236
Train Cross-Entropy Loss: 0.933268
Validation accuracy: 0.63888  Total picks: 225000
Epoch: 237
Train Cross-Entropy Loss: 0.934179
Validation accuracy: 0.6387555555555555  Total picks: 225000
Epoch: 238
Train Cross-Entropy Loss: 0.933000
Validation accuracy: 0.6388044444444444  Total picks: 225000
Epoch: 239
Train Cross-Entropy Loss: 0.933879
Validation accuracy: 0.6388577777777777  Total picks: 225000
Epoch: 240
Train Cross-Entropy Loss: 0.932884
Validation accuracy: 0.6386355555555555  Total picks: 225000
Epoch: 241
Train Cross-Entropy Loss: 0.933250
Validation accuracy: 0.6388888888888888  Total picks: 225000
Epoch: 242
Train Cross-Entropy Loss: 0.935560
Validation accuracy: 0.6390222222222223  Total picks: 225000
Epoch: 243
Train Cross-Entropy Loss: 0.934322
Validation accuracy: 0.6388666666666667  Total picks: 225000
Epoch: 244
Train Cross-Entropy Loss: 0.934089
Validation accuracy: 0.6388355555555556  Total p

Train Cross-Entropy Loss: 0.934146
Validation accuracy: 0.6388311111111111  Total picks: 225000
Epoch: 314
Train Cross-Entropy Loss: 0.933637
Validation accuracy: 0.6387644444444445  Total picks: 225000
Epoch: 315
Train Cross-Entropy Loss: 0.933436
Validation accuracy: 0.6386844444444445  Total picks: 225000
Epoch: 316
Train Cross-Entropy Loss: 0.932819
Validation accuracy: 0.63852  Total picks: 225000
Epoch: 317
Train Cross-Entropy Loss: 0.932281
Validation accuracy: 0.6394044444444444  Total picks: 225000
Epoch: 318
Train Cross-Entropy Loss: 0.933200
Validation accuracy: 0.6388088888888889  Total picks: 225000
Epoch: 319
Train Cross-Entropy Loss: 0.932210
Validation accuracy: 0.6389288888888889  Total picks: 225000
Epoch: 320
Train Cross-Entropy Loss: 0.933728
Validation accuracy: 0.6392622222222222  Total picks: 225000
Epoch: 321
Train Cross-Entropy Loss: 0.932492
Validation accuracy: 0.6389822222222222  Total picks: 225000
Epoch: 322
Train Cross-Entropy Loss: 0.933487
Validation ac

KeyboardInterrupt: 

In [229]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.00001, momentum=0.0)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.33) # Used for exp 47.
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.8)

ep = 0

for run in range(999999):
    
    ep += 1
    print("Epoch:", ep)
    
    # Train1.
    train_net(net, trainloader, 1, optimizer)
    scheduler.step()
    
    # Validation. 
    val_net(net, valloader)

Epoch: 1
Train Cross-Entropy Loss: 0.933023
Validation accuracy: 0.6387644444444445  Total picks: 225000
Epoch: 2
Train Cross-Entropy Loss: 0.932756
Validation accuracy: 0.6389955555555555  Total picks: 225000
Epoch: 3
Train Cross-Entropy Loss: 0.933296
Validation accuracy: 0.63884  Total picks: 225000
Epoch: 4
Train Cross-Entropy Loss: 0.932249
Validation accuracy: 0.6388444444444444  Total picks: 225000
Epoch: 5
Train Cross-Entropy Loss: 0.933449
Validation accuracy: 0.6392  Total picks: 225000
Epoch: 6
Train Cross-Entropy Loss: 0.933664
Validation accuracy: 0.6388933333333333  Total picks: 225000
Epoch: 7
Train Cross-Entropy Loss: 0.933211
Validation accuracy: 0.6389555555555556  Total picks: 225000
Epoch: 8
Train Cross-Entropy Loss: 0.932748
Validation accuracy: 0.6391955555555555  Total picks: 225000
Epoch: 9
Train Cross-Entropy Loss: 0.932864
Validation accuracy: 0.6386533333333333  Total picks: 225000
Epoch: 10
Train Cross-Entropy Loss: 0.932511
Validation accuracy: 0.6387244444

KeyboardInterrupt: 