# LoL Prediction S10
> LOL s10 high elo ranked games prediction.

- toc: true 
- badges: true
- comments: true
- author: Jaekang Lee
- image: images/diagram.png
- categories: [fastpages, jupyter]## Introduction

Let's predict who won the match given team composition and how long game played out

## Get dataset

In [205]:
import pandas as pd
df = pd.read_csv("games.csv")

Some Setups

In [185]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

%load_ext tensorboard

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [186]:
df.head(5)

Unnamed: 0,game_length,mmr,result,server,team_1,team_2,timestamp
0,25m 38s,,Victory,na,"Riven,Nidalee,Galio,Jhin,Pantheon","Camille,Olaf,Cassiopeia,Ezreal,Alistar",2020-10-13 09:31:42
1,25m 38s,,Defeat,na,"Teemo,Nidalee,Lucian,Caitlyn,Senna","Irelia,Hecarim,Cassiopeia,Jinx,Lulu",2020-10-13 06:00:17
2,25m 38s,,Defeat,na,"Malphite,Olaf,Taliyah,Ezreal,Alistar","Sylas,Lillia,Lucian,Senna,Pantheon",2020-10-13 05:06:45
3,25m 38s,,Defeat,na,"Neeko,Shen,Orianna,Kai'Sa,Nautilus","Riven,Hecarim,Cassiopeia,Samira,Morgana",2020-10-13 04:28:00
4,25m 38s,,Defeat,na,"Fiora,Nunu & Willump,Irelia,Jhin,Karma","Renekton,Elise,Kled,Jinx,Morgana",2020-10-13 04:00:51


In [206]:
temp_df = df[['game_length', 'result', 'team_1', 'team_2']] # Select only interests
blue = temp_df['team_1']
red = temp_df['team_2']
n = len(df)

blue_champs = []
red_champs = []
for i in range(0,n):
    blue_champs += [blue[i].split(',')]
    red_champs += [red[i].split(',')]
    
top = []
jg = []
mid = []
adc = []
sup = []
for i in range(0, n):
    top += [blue_champs[i][0]]
    jg += [blue_champs[i][1]]
    mid += [blue_champs[i][2]]
    adc += [blue_champs[i][3]]
    sup += [blue_champs[i][4]]
    
top_2 = []
jg_2 = []
mid_2 = []
adc_2 = []
sup_2 = []
for i in range(0, n):
    top_2 += [red_champs[i][0]]
    jg_2 += [red_champs[i][1]]
    mid_2 += [red_champs[i][2]]
    adc_2 += [red_champs[i][3]]
    sup_2 += [red_champs[i][4]]

In [207]:
data = temp_df.drop(columns=['team_1','team_2'])
# blue team
data['top1'] = top
data['jg1'] = jg
data['mid1'] = mid
data['adc1'] = adc
data['sup1'] = sup
# red team
data['top2'] = top_2
data['jg2'] = jg_2
data['mid2'] = mid_2
data['adc2'] = adc_2
data['sup2'] = sup_2

In [208]:
data.head(10)


Unnamed: 0,game_length,result,top1,jg1,mid1,adc1,sup1,top2,jg2,mid2,adc2,sup2
0,25m 38s,Victory,Riven,Nidalee,Galio,Jhin,Pantheon,Camille,Olaf,Cassiopeia,Ezreal,Alistar
1,25m 38s,Defeat,Teemo,Nidalee,Lucian,Caitlyn,Senna,Irelia,Hecarim,Cassiopeia,Jinx,Lulu
2,25m 38s,Defeat,Malphite,Olaf,Taliyah,Ezreal,Alistar,Sylas,Lillia,Lucian,Senna,Pantheon
3,25m 38s,Defeat,Neeko,Shen,Orianna,Kai'Sa,Nautilus,Riven,Hecarim,Cassiopeia,Samira,Morgana
4,25m 38s,Defeat,Fiora,Nunu & Willump,Irelia,Jhin,Karma,Renekton,Elise,Kled,Jinx,Morgana
5,25m 38s,Defeat,Irelia,Karthus,Sylas,Samira,Nautilus,Riven,Kayn,Akali,Miss Fortune,Galio
6,25m 38s,Defeat,Galio,Kindred,Syndra,Ezreal,Blitzcrank,Camille,Fiddlesticks,Twisted Fate,Jhin,Morgana
7,25m 38s,Defeat,Poppy,Ekko,Sylas,Samira,Blitzcrank,Lucian,Lillia,Lulu,Caitlyn,Alistar
8,25m 38s,Defeat,Shen,Lillia,Samira,Lucian,Soraka,Taric,Master Yi,Riven,Ezreal,Lulu
9,25m 38s,Defeat,Ornn,Graves,Sylas,Lucian,Alistar,Irelia,Hecarim,Akali,Senna,Leona


In [209]:
from sklearn.preprocessing import OneHotEncoder
#y = pd.get_dummies(data.top1, prefix='top1')
enc = OneHotEncoder()
only_champs = data.drop(columns=['game_length', 'result'])
only_champs.head(5)
only_champs_onehot = enc.fit_transform(only_champs)


In [3]:
# print(only_champs_onehot)

In [2]:
# enc.categories_

In [1]:
# Convert game_length to float and normalize
import re
date_str = data.game_length
m = 2717 #longest games are 45m 17s

for i in range(len(date_str)):
    if type(date_str[i]) == str:
        p = re.compile('\d*')
        min = float(p.findall(date_str[i][:2])[0])
        temp = p.findall(date_str[i][-3:])
        for j in temp:
            if j != '':
                sec = float(j)
                break
        date_str[i] = (60*min+sec)/m
    else: 
        date_str[i] = date_str[i]/m
    print(date_str[i])
print(len(date_str))

In [264]:
# Now we have the X we want
#except_champs = data.drop(columns=['result','top1','jg1','mid1','adc1','sup1','top2','jg2','mid2','adc2','sup2'])
sparse_to_df = pd.DataFrame.sparse.from_spmatrix(only_champs_onehot)
print(sparse_to_df.shape)
print(date_str.shape)

X = date_str.to_frame().join(sparse_to_df).dropna()
X = np.asarray(X).astype('float32')

(4028, 754)
(4028,)


In [265]:
print(type(X))
print(X.shape)
# print(X.isnull())

<class 'numpy.ndarray'>
(4028, 755)


Train test set

In [270]:
y = data['result']
for i in range(len(y)):
    if y[i] == "Victory":
        y[i] = 1
    else:
        y[i] = 0

In [276]:
y = np.asarray(y).astype('float32')

## Datas are one hot encoded and cleaned up

In [277]:
from sklearn.model_selection import train_test_split
import math

X_train_full, X_test, y_train_full, y_test = train_test_split(X,y,test_size=0.2, random_state=42)
#len(X_train) = 3222
l = math.floor(3222*0.8)
X_valid, X_train = X_train_full[:l], X_train_full[l:]
y_valid, y_train = y_train_full[:l], y_train_full[l:]
print(y_valid.shape)
print(X_valid.shape)


(2577,)
(2577, 755)


In [288]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(755,)),
    keras.layers.Dense(30, activation="relu", name="layer_1"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(16, activation="relu", name="layer_2"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(16, activation="relu", name="layer_3"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(1, activation="sigmoid", name="layer_4")
])

In [294]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_7 (Flatten)          (None, 755)               0         
_________________________________________________________________
layer_1 (Dense)              (None, 30)                22680     
_________________________________________________________________
dropout_2 (Dropout)          (None, 30)                0         
_________________________________________________________________
layer_2 (Dense)              (None, 16)                496       
_________________________________________________________________
dropout_3 (Dropout)          (None, 16)                0         
_________________________________________________________________
layer_3 (Dense)              (None, 16)                272       
_________________________________________________________________
dropout_4 (Dropout)          (None, 16)              

In [295]:
model.fit(X_train, y_train, epochs=50, batch_size=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1cf07241a30>

In [296]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print('accuracy', test_acc)

accuracy 0.6464020013809204


We got about 0.646 accuracy with just raw neural network with dropouts.

## Don't have a clue how to improve yet I'll come back next time

- Convolution doesn't work although I could try.. positions matter?
- Should try Regression or randome forest