In [None]:
# Install necessary dependencies to run the notebook
!python -m pip install -U pip pandas setuptools wheel pandas_profiling autogluon

In [None]:
# Imports

from pandas_profiling import ProfileReport
from autogluon.tabular import TabularPredictor, TabularDataset
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json

In [None]:
# Read the dataset from local file
df = pd.read_csv('matchups_livelabs.csv', engine='python')

In [None]:
# HTML report for Exploratory Data Analysis
report = ProfileReport(df, title="Matchups (LiveLabs Format) Exploration", html={'style': {'full_width': True}})

report.to_notebook_iframe()

In [None]:
# Display 5 first rows
df.head(5)

In [None]:
# Display list of columns in the dataset
df.columns

In [None]:
# Get some stats from pandas (also findable in the AutoGluon HTML report)
df.describe()

In [None]:
#print(json.loads(df.iloc[0].to_json()))

In [None]:
df.iloc[0]

In [None]:
# Create AutoGluon Tabular Dataset 
# https://auto.gluon.ai/stable/tutorials/tabular_prediction/index.html
df = TabularDataset(df)

In [None]:
"""
BONUSARMORPENETRATIONPERCENT has constant value "0" 	Constant
BONUSMAGICPENETRATIONPERCENT has constant value "0" 	Constant
COOLDOWNREDUCTION has constant value "0" 	Constant
ARMORPENETRATIONFLAT has constant value "0" 	Constant
IDENTIFIER has a high cardinality: 34990 distinct values 
"""


# drop columns we don't want (identifiers + constant columns)
df = df.drop(columns=['BONUSARMORPENETRATIONPERCENT', 'BONUSMAGICPENETRATIONPERCENT', 'COOLDOWNREDUCTION',
    'ARMORPENETRATIONFLAT', 'IDENTIFIER', 'Unnamed: 24']) # Unnamed: 24 is a null column, just remove it

# Perform 80-20% train-test split
train = df.sample(frac=0.8,random_state=200) #random state is a seed value
test = df.drop(train.index)

In [None]:
# New look at the data
df.iloc[0]

In [None]:
# Determine the label in our TabularDataset we want to predict
label = 'WINNER'


# Create the tabular predictor with the target label by passing the training test
predictor = TabularPredictor(label=label,
                path='./ag_hol2_livelabs').fit(train, time_limit=600)

In [None]:
# Make a test prediction from original test dataset
y_test = test[label] # we want 'win' column to be predicted


test_data_nolabel = test.drop(columns=[label])  # delete label column, also drop identifier column
# We have the testing dataset ready
test_data_nolabel.head(5)

In [None]:
# We load the model 
predictor = TabularPredictor.load('./ag_hol2_livelabs')

In [None]:
# Best performing models
predictor.leaderboard(test, silent=False)

In [None]:
# For the best model, display feature importance
predictor.feature_importance(test,
                            subsample_size=5000)

In [None]:
# Making predictions from incoming data
# goldearned, totalminionskilled, win, kills, assists, deaths, champion, visionscore, totaldamagedealttochampions, gameversion
'''MAGICRESIST                    29
HEALTHREGENRATE                17
SPELLVAMP                       0
TIMESTAMP                  180033
MAXHEALTH                     714
MOVESPEED                     345
ATTACKDAMAGE                   65
ARMORPENETRATIONPERCENT         0
LIFESTEAL                       0
ABILITYPOWER                   15
RESOURCEVALUE                  70
MAGICPENETRATIONFLAT            0
ATTACKSPEED                   102
CURRENTHEALTH                 513
ARMOR                          42
MAGICPENETRATIONPERCENT         0
RESOURCEMAX                   100
RESOURCEREGENRATE               0
'''
data = [29, 17, 0, 180033, 714, 345, 65, 0, 0, 15, 70, 0, 102, 513, 42, 0, 100, 0]

# From a list, load it into a dataframe and specify column names for consistency
test_d = pd.DataFrame([data], columns=['MAGICRESIST', 'HEALTHREGENRATE', 'SPELLVAMP', 'TIMESTAMP', 'MAXHEALTH',
                                       'MOVESPEED', 'ATTACKDAMAGE', 'ARMORPENETRATIONPERCENT', 'LIFESTEAL',
                                       'ABILITYPOWER', 'RESOURCEVALUE', 'MAGICPENETRATIONFLAT', 'ATTACKSPEED',
                                       'CURRENTHEALTH', 'ARMOR', 'MAGICPENETRATIONPERCENT', 'RESOURCEMAX',
                                       'RESOURCEREGENRATE'])

In [None]:
#data = [91, 35, 0, 1080321, 2204, 390, 225, 0, 10, 0, 672, 0, 220, 0, 94, 0, 1047, 33]
#test_d = pd.DataFrame([data], columns=['magicResist', 'healthRegenRate', 'spellVamp', 'timestamp', 'maxHealth', 'moveSpeed', 'attackDamage', 'armorPenetrationPercent', 'lifesteal', 'abilityPower', 'resourceValue', 'magicPenetrationFlat', 'attackSpeed', 'currentHealth', 'armor', 'magicPenetrationPercent', 'resourceMax', 'resourceRegenRate'])
print(test_d[:1])

In [None]:
# Make prediction
predictor.predict(test_d[1:10])

# Print how probable each class is
print('Probabilities: {}'.format(
    predictor.predict_proba(test_d[1:10])))