In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import sklearn.model_selection
import sklearn.dummy
import torch

In [2]:
# read both the matches and the frames
match_df = pd.read_csv('../output/csv/lol-data-matches-fixed-duration.csv')
frame_df = pd.read_csv('../output/csv/lol-data-match-frames.csv')

# set the index of the match id
match_df = match_df.set_index('match_id')

# Drop the columns we do not need
match_df.drop(labels=['count','division','patch','region','first_rift_herald'], axis=1, inplace=True)

# Fill all values that are not filled out
match_df.fillna(0, inplace=True)

# Remove any matches with no winning team
match_df = match_df[match_df.winning_team!=0]

# Change the IDs of blue side and red side to 1 and -1, respectively
match_df.replace({
    'winning_team': {100: 1, 200: -1},
    'first_champion': {100: 1, 200: -1},
    'first_tower': {100: 1, 200: -1},
    'first_inhibitor': {100: 1, 200: -1},
    'first_baron': {100: 1, 200: -1},
    'first_dragon': {100: 1, 200: -1},
}, inplace=True)

# Reinterpret all values as int32s
match_df = match_df.astype({
    'winning_team': 'int32',
    'first_champion': 'int32',
    'first_tower': 'int32',
    'first_inhibitor': 'int32',
    'first_baron': 'int32',
    'first_dragon': 'int32',
})
match_df

Unnamed: 0_level_0,tier,game_duration,winning_team,first_champion,first_tower,first_inhibitor,first_baron,first_dragon
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1
EUW1_5479575964,BRONZE,1719,1,1,-1,1,0,1
EUW1_5479499524,BRONZE,1352,-1,-1,-1,-1,0,1
EUW1_5479492935,BRONZE,1647,-1,-1,1,-1,0,-1
EUW1_5479357161,BRONZE,1509,-1,-1,-1,-1,0,-1
...,...,...,...,...,...,...,...,...
EUW1_5544360421,GRANDMASTERS,1928,-1,1,-1,-1,-1,1
EUW1_5544335270,GRANDMASTERS,1101,-1,-1,-1,0,0,-1
EUW1_5544282724,GRANDMASTERS,1788,1,-1,1,1,1,-1
EUW1_5544046900,GRANDMASTERS,2133,1,-1,1,1,1,1


In [3]:
# Merge the match dataframe and the frame dataframe into one common table
df = match_df.merge(frame_df,left_on='match_id', right_on='match_id').set_index('match_id')
df


Unnamed: 0_level_0,tier,game_duration,winning_team,first_champion,first_tower,first_inhibitor,first_baron,first_dragon,count,frame,...,red_total_kills,red_total_gold,red_total_cs,red_total_damage,red_towers,red_plates,red_inhibitors,red_barons,red_dragons,red_rift_heralds
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1,80697,0,...,0,2500,0,0,0,0,0,0,0,0
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1,80698,1,...,0,2500,0,0,0,0,0,0,0,0
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1,80699,2,...,0,2889,13,323,0,0,0,0,0,0
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1,80700,3,...,0,4423,53,1785,0,0,0,0,0,0
EUW1_5479661889,BRONZE,1797,1,1,1,1,1,1,80701,4,...,0,5701,79,2828,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EUW1_5543889504,GRANDMASTERS,1461,-1,-1,1,-1,-1,-1,824350,21,...,27,48140,724,52148,2,11,0,0,3,1
EUW1_5543889504,GRANDMASTERS,1461,-1,-1,1,-1,-1,-1,824351,22,...,27,52429,758,61645,2,11,0,1,3,1
EUW1_5543889504,GRANDMASTERS,1461,-1,-1,1,-1,-1,-1,824352,23,...,32,54304,773,69206,2,11,0,1,3,1
EUW1_5543889504,GRANDMASTERS,1461,-1,-1,1,-1,-1,-1,824353,24,...,32,57442,793,76620,2,11,0,1,3,1


In [4]:
## Here we train a dummy classifier to compare performance

def train_dummy_classifier(X, y):
    X_trn, X_tst, y_trn, y_tst = sklearn.model_selection.train_test_split(X, y, test_size=0.33, random_state=0)
    dummy_clf = sklearn.dummy.DummyClassifier(strategy='uniform', random_state=0)
    # Scale the data with MinMax to avoid negative values
    scaler = sklearn.preprocessing.MinMaxScaler()
    scaler.fit(X_trn)
    X_trn = scaler.transform(X_trn)
    X_tst = scaler.transform(X_tst)

    dummy_clf.fit(X_trn,y_trn)
    print(dummy_clf.score(X_tst, y_tst))

In [33]:
# Loop created to go through every frame in the data set. 
# For each frame, pick all corresponding data points, and 
# train both the neural network and the dummy classifier
# with it
current_frame = 25

print("TRAINING ON FRAME #%i" % current_frame)
# Get the values of X and y for a given frame
X = df[df.frame==current_frame][[
    'blue_total_kills',
    'blue_total_gold',
    'blue_total_cs',
    'blue_total_damage',
    'blue_towers', 
    'blue_plates',
    'blue_inhibitors', 
    'blue_barons', 
    'blue_dragons', 
    'blue_rift_heralds',
    'red_total_kills', 
    'red_total_gold', 
    'red_total_cs', 
    'red_total_damage',
    'red_towers', 
    'red_plates', 
    'red_inhibitors', 
    'red_barons',
    'red_dragons', 
    'red_rift_heralds',
]].values
y = df[df.frame==current_frame]['winning_team'].values

# Convert -1 and +1 to 0 and 1 for the tensors to work
y = np.interp(y, (-1,+1), (0, 1)).astype(np.int32)

# Two things occur here:
# 1. Normalize the data from values to a distribution [0,1] in l1 norm
#    e.g if Red side has 11 kill, and Blue has 9, we represent it as
#        0.55 and 0.45, respectively.
# 2. Collapse each blue features to their red features by subtracting them
#    e.g for ourprevious example, subtracting blue from red would result
#        to having 0.55-0.45 = 0.1. Hence a 10% advantage for blue
#        FIXME: could be an arbitrary x[0,:] - 0.5 to avoid doubling up
def combine(x):
    x = sklearn.preprocessing.normalize(x.reshape(2,10), norm='l1', axis=0)
    return x.reshape(20)
# Normalize the data to have 10 features 
# corresponding to a better comparison.
X = np.apply_along_axis(combine, 1, X)


TRAINING ON FRAME #25
(26565, 20)
(26565, 20)


In [34]:
## This is the neural network trained in Lab 8

def train_neural_network(X, y):
    # Split the data 2/3 to 1/3
    X_trn, X_tst, y_trn, y_tst = sklearn.model_selection.train_test_split(X, y, test_size=0.33, random_state=0)
    # Scale the data with MinMax to avoid negative values
    print(X_trn.shape)
    scaler = sklearn.preprocessing.MinMaxScaler()
    scaler.fit(X_trn)
    X_trn = scaler.transform(X_trn)
    X_tst = scaler.transform(X_tst)

    # Tensors setup
    X_trn_torch = torch.tensor(X_trn, dtype=torch.float32)
    y_trn_torch = torch.tensor(y_trn, dtype=torch.int64)
    X_tst_torch = torch.tensor(X_tst, dtype=torch.float32)
    y_tst_torch = torch.tensor(y_tst, dtype=torch.int64)

    torch.manual_seed(0) # Ensure model weights initialized with same random numbers

    # Create an object that holds a sequence of layers and activation functions
    model = torch.nn.Sequential(
        torch.nn.Linear(20, 10),   # Applies Wx+b from 10 dimensions down to 2
        torch.nn.ReLU(),
        torch.nn.Linear(10,2)
    )

    # Create an object that can compute "negative log likelihood of a softmax"
    loss = torch.nn.CrossEntropyLoss()

    # Use stochastic gradient descent to train the model
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

    # Use 100 training samples at a time to compute the gradient.
    batch_size = 200

    # Make 10 passes over the training data, each time using batch_size samples to compute gradient
    num_epoch = 10
    next_epoch = 1

    for epoch in range(next_epoch, next_epoch+num_epoch):
        # Make an entire pass (an 'epoch') over the training data in batch_size chunks
        for i in range(0, len(X_trn), batch_size):        
            X_cur = X_trn_torch[i:i+batch_size]     # Slice out a mini-batch of features
            y_cur = y_trn_torch[i:i+batch_size]     # Slice out a mini-batch of targets
            
            y_pred = model(X_cur)                   # Make predictions (final-layer activations)
            l = loss(y_pred, y_cur)                 # Compute loss with respect to predictions
            
            model.zero_grad()                   # Reset all gradient accumulators to zero (PyTorch thing)
            l.backward()                        # Compute gradient of loss wrt all parameters (backprop!)
            optimizer.step()                    # Use the gradients to take a step with SGD.
            
        print("Epoch %2d: loss on final training batch: %.4f" % (epoch, l.item()))
        
    print("Epoch %2d: loss on test set: %.4f" % (epoch, loss(model(X_tst_torch), y_tst_torch)))
    next_epoch = epoch+1

train_neural_network(X, y)
# train_dummy_classifier(X, y)

(17798, 20)
Epoch  1: loss on final training batch: 0.3424
Epoch  2: loss on final training batch: 0.3449
Epoch  3: loss on final training batch: 0.3406
Epoch  4: loss on final training batch: 0.3372
Epoch  5: loss on final training batch: 0.3357
Epoch  6: loss on final training batch: 0.3339
Epoch  7: loss on final training batch: 0.3328
Epoch  8: loss on final training batch: 0.3320
Epoch  9: loss on final training batch: 0.3312
Epoch 10: loss on final training batch: 0.3306
Epoch 10: loss on test set: 0.3742
