In [None]:
# Install necessary dependencies to run the notebook
# python -m pip install -U pip pandas setuptools wheel pandas_profiling autogluon

In [None]:
from pandas_profiling import ProfileReport
from autogluon.tabular import TabularPredictor, TabularDataset
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json

In [None]:
# Read the dataset from local file
df = pd.read_json('/Users/mattgunnin/Sites/AI/20_GPT/gpt_lolcoach/data/lol/export.json', orient='records')

In [None]:
# HTML report for Exploratory Data Analysis
report = ProfileReport(df, title="Matchups Exploration", html={'style': {'full_width': True}})

report.to_notebook_iframe()

In [None]:
# Display 5 first rows
df.head(5)

In [None]:
# Display list of columns in the dataset
df.columns

In [None]:
# Get some stats from pandas (also findable in the AutoGluon HTML report)
df.describe()

In [None]:
df.iloc[0]

In [None]:
# Create AutoGluon Tabular Dataset 
# https://auto.gluon.ai/stable/tutorials/tabular_prediction/index.html
df = TabularDataset(df)

In [None]:
# drop columns we don't want (identifiers)
def _drop_column(df, col_name=list()):
    for x in col_name:
        try:
            df.drop([x],
                axis=1,
                inplace=True)
        except KeyError:
            print('{} already dropped from df'.format(x))
        

    return df


df = _drop_column(df, col_name=['puuid', 'summonerName'])

# Perform 80-20% train-test split
train = df.sample(frac=0.8, random_state=200) # random state is a seed value
test = df.drop(train.index)

In [None]:
# New look at the data - one example
df.iloc[0]

In [None]:
# Determine the label in our TabularDataset we want to predict
label = 'win'


# Create the tabular predictor with the target label by passing the training test
predictor = TabularPredictor(label=label,
                path='./autogluon_trained_models_liveclient_classifier').fit(train, time_limit=60)

In [None]:
# Make a test prediction from original test dataset
y_test = test[label] # we want 'win' column to be predicted


test_data_nolabel = test.drop(columns=[label])  # delete label column, also drop identifier column
# We have the testing dataset ready
test_data_nolabel.head(5)

In [None]:
# We load the model 
predictor = TabularPredictor.load('./autogluon_trained_models_liveclient_classifier')

# Predict test values
y_pred = predictor.predict(test_data_nolabel)
print("Predictions:  \n", y_pred)

# Evaluate prediction performance
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)

In [None]:
# Best performing models
predictor.leaderboard(test, silent=False)

In [None]:
# For the best model, display feature importance
predictor.feature_importance(test,
                            subsample_size=5000)

In [None]:
# Making predictions from incoming data
# goldearned, totalminionskilled, win, kills, assists, deaths, champion, visionscore, totaldamagedealttochampions, gameversion
# [5506, 134, false, 0, 1, 3, Jayce, 7, 2350, 11.15.389.2308]
data = [5506, 134, 0, 1, 3, 'Jayce', 7, 2350, '11.15.389.2308']

# From a list, load it into a dataframe and specify column names for consistency
test_d = pd.DataFrame([data], columns=['goldearned', 'totalminionskilled', 'kills', 'assists', 'deaths',
                                       'champion', 'visionscore', 'totaldamagedealttochampions', 'gameversion'])

In [None]:
#data = [91, 35, 0, 1080321, 2204, 390, 225, 0, 10, 0, 672, 0, 220, 0, 94, 0, 1047, 33]
#test_d = pd.DataFrame([data], columns=['magicResist', 'healthRegenRate', 'spellVamp', 'timestamp', 'maxHealth', 'moveSpeed', 'attackDamage', 'armorPenetrationPercent', 'lifesteal', 'abilityPower', 'resourceValue', 'magicPenetrationFlat', 'attackSpeed', 'currentHealth', 'armor', 'magicPenetrationPercent', 'resourceMax', 'resourceRegenRate'])


In [None]:
# Make prediction
predictor.predict(test_d)

# Print how probable each class is
print(predictor.predict_proba(test_d).iloc[0])