This notebook uses a local file called matchups.csv. This file is local and should be in the same directory as the notebook in order to be properly imported.

This analysis method is recommended as it's easier. Everything can be uploaded to an OCI Data Science environment.

This method is optional and should only work once all prerequisites and installation steps are finished. If you are starting out with the repository, it's recommended to check out hol1_offline_analysis.ipynb instead.

Configuration parameters can be currently be found and populated in config.yaml.

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json
import os
pd.set_option('float_format', '{:f}'.format)

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# execute if you're missing any of these libraries. This is a way to install them.
'''

import os
os.environ['PATH']='/home/ubuntu/miniconda3/bin:$PATH' # set your own python environment


!python -m pip install -U pip
!python -m pip install -U setuptools wheel
!pip install pandas_profiling
## install packages
!pip install -q scikit-learn
'''

"\n\nimport os\nos.environ['PATH']='/home/ubuntu/miniconda3/bin:$PATH' # set your own python environment\n\n\n!python -m pip install -U pip\n!python -m pip install -U setuptools wheel\n!pip install pandas_profiling\n## install packages\n!pip install -q scikit-learn\n"

In [3]:
# We read the dataset from the local file
df = pd.read_csv('matchups.csv', sep=',')

In [4]:
from pandas_profiling import ProfileReport

In [5]:
#report = ProfileReport(df, title="Matchups Exploration", html={'style': {'full_width': True}})
#report #uncomment to display all.

#report.to_notebook_iframe() # to_file('output.html')

In [6]:
# We read the dataset from the local file, after taking a look at the original dataset.
df = pd.read_csv('1v1.csv', sep=',')

df.head(5)

Unnamed: 0,match_id,champ1,champ2,win
0,0,Ezreal,Ziggs,1
1,1,Pyke,Sett,1
2,2,Diana,Viego,1
3,3,Jayce,Gwen,1
4,4,Pantheon,Zed,1


In [7]:
df['champ1'].unique()

array(['Ezreal', 'Pyke', 'Diana', 'Jayce', 'Pantheon', 'Galio',
       'Mordekaiser', 'Anivia', 'Kalista', 'Brand', 'LeeSin', 'Katarina',
       'Jinx', 'Nami', 'Sylas', 'Irelia', 'Lucian', 'Zyra', 'Kayn', 'Zac',
       'Qiyana', 'Jhin', 'Caitlyn', 'Morgana', 'Viego', 'Tryndamere',
       'Kassadin', 'Camille', 'Leblanc', 'Draven', 'Syndra', 'Ziggs',
       'Bard', 'XinZhao', 'Nocturne', 'Yasuo', 'Riven', 'Corki', 'Karma',
       'Elise', 'MonkeyKing', 'Sivir', 'Blitzcrank', 'Jax', 'Zilean',
       'Ashe', 'Velkoz', 'DrMundo', 'Ekko', 'Thresh', 'Nunu', 'Sett',
       'Khazix', 'Cassiopeia', 'Kaisa', 'Nautilus', 'Gwen', 'Graves',
       'Volibear', 'KogMaw', 'Lulu', 'Ryze', 'Lux', 'Rengar', 'Alistar',
       'Karthus', 'Gragas', 'Shen', 'Neeko', 'TahmKench', 'Akali',
       'Vayne', 'Seraphine', 'Gnar', 'Tristana', 'Leona', 'Yone',
       'Viktor', 'Skarner', 'Garen', 'Zed', 'Soraka', 'Talon', 'Fiora',
       'TwistedFate', 'MissFortune', 'Veigar', 'Hecarim', 'Shyvana',
       'Aatrox',

In [8]:
champ_list = df['champ1'].unique().tolist()

print(type(champ_list), champ_list)

<class 'list'> ['Ezreal', 'Pyke', 'Diana', 'Jayce', 'Pantheon', 'Galio', 'Mordekaiser', 'Anivia', 'Kalista', 'Brand', 'LeeSin', 'Katarina', 'Jinx', 'Nami', 'Sylas', 'Irelia', 'Lucian', 'Zyra', 'Kayn', 'Zac', 'Qiyana', 'Jhin', 'Caitlyn', 'Morgana', 'Viego', 'Tryndamere', 'Kassadin', 'Camille', 'Leblanc', 'Draven', 'Syndra', 'Ziggs', 'Bard', 'XinZhao', 'Nocturne', 'Yasuo', 'Riven', 'Corki', 'Karma', 'Elise', 'MonkeyKing', 'Sivir', 'Blitzcrank', 'Jax', 'Zilean', 'Ashe', 'Velkoz', 'DrMundo', 'Ekko', 'Thresh', 'Nunu', 'Sett', 'Khazix', 'Cassiopeia', 'Kaisa', 'Nautilus', 'Gwen', 'Graves', 'Volibear', 'KogMaw', 'Lulu', 'Ryze', 'Lux', 'Rengar', 'Alistar', 'Karthus', 'Gragas', 'Shen', 'Neeko', 'TahmKench', 'Akali', 'Vayne', 'Seraphine', 'Gnar', 'Tristana', 'Leona', 'Yone', 'Viktor', 'Skarner', 'Garen', 'Zed', 'Soraka', 'Talon', 'Fiora', 'TwistedFate', 'MissFortune', 'Veigar', 'Hecarim', 'Shyvana', 'Aatrox', 'Sion', 'Kayle', 'Poppy', 'Rumble', 'Xerath', 'Orianna', 'Janna', 'Renekton', 'Rakan', '

In [9]:
print(df.columns) # we have 4 columns, 'win' is what we want to predict

Index(['match_id', 'champ1', 'champ2', 'win'], dtype='object')


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1288773 entries, 0 to 1288772
Data columns (total 4 columns):
 #   Column    Non-Null Count    Dtype 
---  ------    --------------    ----- 
 0   match_id  1288773 non-null  int64 
 1   champ1    1288773 non-null  object
 2   champ2    1288773 non-null  object
 3   win       1288773 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 39.3+ MB


In [11]:
# Usually we check for null values. In this case we don't have any null values in the dataset,
# so this operation is redundant in this case

# df = df.dropna() # we drop null values and corresponding rows.

In [12]:
# Drop match_id column as it's a String that provides no value
df.drop(['match_id'], axis=1, inplace=True)

In [13]:
train_dataset = df.sample(frac=0.8, random_state=0) # 80-20 train-test splitting.
test_dataset = df.drop(train_dataset.index) # drop all rows present in the train dataset.

In [14]:
# Splitting features (what we use to predict) from labels (what we want to predict)
# We want to predict the 'win' variable.
# The rest of variables will be inputs.

train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('win') # returns column 'win'
test_labels = test_features.pop('win') # returns column 'win'

In [15]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

le = LabelEncoder()

le = le.fit(champ_list) # fit the label encoder with the whole champion list.

train_features = train_features.apply(lambda x: le.transform(x))
test_features = test_features.apply(lambda x: le.transform(x))

In [16]:
# Scaling
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

In [17]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.fit(train_features, train_labels)
# score(X, y[, sample_weight]): Return the mean accuracy on the given test data and labels
print(logreg.predict_proba(train_features))
print(logreg.score(train_features, train_labels))

[[0.51541224 0.48458776]
 [0.51096743 0.48903257]
 [0.51515081 0.48484919]
 ...
 [0.51376513 0.48623487]
 [0.51221206 0.48778794]
 [0.51455823 0.48544177]]
0.5132888077608733


In [18]:
new_data = {
    'champ1': ['Ezreal', 'Janna', 'Anivia', 'Gnar', 'Warwick'],
    'champ2': ['Jhin', 'Blitzcrank', 'Viktor', 'Jax', 'Amumu']

}
'''
['Jhin', 'Janna', 'Anivia', 'Gnar', 'Warwick']
['Tristana', 'Sona', 'Ahri', 'Riven', 'Nidalee']
['Sivir', 'Blitzcrank', 'Viktor', 'Jax', 'Amumu']
'''
new_df = pd.DataFrame(new_data)

new_df = new_df.apply(lambda x: le.transform(x)) # we use the previously created label encoder

scaled = scaler.fit_transform(new_df)

new_df.tail(5)

Unnamed: 0,champ1,champ2
0,28,48
1,44,13
2,6,139
3,35,46
4,142,5


In [19]:
scaled # check what happens with scaling (example)

# e.g. 'Tristana' becomes 0.9229278

array([[-0.48727064, -0.04620625],
       [-0.14829976, -0.78130573],
       [-0.9533556 ,  1.8650524 ],
       [-0.33897088, -0.08821194],
       [ 1.92789687, -0.94932847]])

In [20]:
result = logreg.predict(new_df)
print(result)

detailed_result = logreg.predict_proba(new_df)

print(detailed_result)

[0 1 0 0 1]
[[0.57085824 0.42914176]
 [0.496072   0.503928  ]
 [0.73050918 0.26949082]
 [0.56122379 0.43877621]
 [0.3951099  0.6048901 ]]


In [21]:
def find_winner(lst):
    return max(set(lst), key=lst.count)

def find_winner_new(lst):
    count_team_1 = 0; count_team_2 = 0
    count = 0
    extra_probabilities = list()
    for x in lst:
        print(x)
        additional_prob = (x[0] - 0.5) * 100
        extra_probabilities.append(additional_prob)
        if x[0] >= x[1]:
            #print('Champ 1 most likely to win ({}% additional probability)'.format(additional_prob))
            count_team_1 += 1
        else:
            #print('Champ 2 most likely to win ({}% additional probability)'.format((x[1]-x[0])*100))
            count_team_2 += 1
    return count_team_1, count_team_2, extra_probabilities


# we take the mode as a "normalizer"
team_1, team_2, probabilities = find_winner_new(detailed_result)
print(team_1, team_2, probabilities)

#if sum(probabilities) > 0:
if team_1 > team_2:
    print('Team 1 most likely to win with an additional probability of {}'.format(sum(probabilities)))
else:
    print('Team 2 most likely to win with an additional probability of {}'.format(sum(probabilities)))

[0.57085824 0.42914176]
[0.496072 0.503928]
[0.73050918 0.26949082]
[0.56122379 0.43877621]
[0.3951099 0.6048901]
3 2 [7.085823690873272, -0.3928003875371511, 23.05091752259806, 6.122379069905581, -10.489009544500005]
Team 1 most likely to win with an additional probability of 25.37731035133976


In [22]:
# we make the inverse transform to convert to human-readable format again
inverse_prediction = new_df.apply(lambda x: le.inverse_transform(x))

inverse_prediction

Unnamed: 0,champ1,champ2
0,Ezreal,Jhin
1,Janna,Blitzcrank
2,Anivia,Viktor
3,Gnar,Jax
4,Warwick,Amumu


In [23]:
if result[0] == 1:
    print('Predicted winner is {}'.format(str(inverse_prediction['champ1'].to_string())))
else:
    print('Predicted winner is {}'.format(str(inverse_prediction['champ2'].to_string())))


Predicted winner is 0          Jhin
1    Blitzcrank
2        Viktor
3           Jax
4         Amumu
