# Analysis

get the locally saved data

In [1]:
import pickle
import pandas as pd
import os
import sys
import seaborn as sn
import matplotlib.pyplot as plt
from pathlib import Path
import yaml

sys.path.append('/home/joel/projects/driftlon/analysis')
sys.path.append('/home/joel/projects/driftlon')
from correlation import *
from analysis_utils import *

In [2]:
mongo_config = yaml.safe_load(open('/home/joel/projects/driftlon/config.yml', 'r'))['mongodb']
fetcher = DataFetcher(mongo_config['address'], mongo_config['username'], mongo_config['password'])

In [3]:
X = pickle.load(open('../X.pkl', 'rb+'))
Y = pickle.load(open('../Y.pkl', 'rb+'))

In [4]:
len(X), len(Y)

(87303, 87303)

filter for the numeric fields

In [5]:
non_numerical_fields_path = './non_numeric_fields.yaml'

with open(non_numerical_fields_path, 'r') as file_:
    non_numerical_fields = yaml.load(file_.read(),  Loader=yaml.BaseLoader)

In [6]:
all_keys = get_all_keys(X)
common_keys = [x for x in get_common_keys(X) if x not in non_numerical_fields]
data_for_keys = get_data_for_keys(common_keys, X)

In [7]:
all_data = pd.DataFrame(data_for_keys).T.set_axis(common_keys, axis=1)
quant_indices = [common_keys[x[0]] for x in enumerate(all_data.loc[0,:]) if type(x[1])==int] 
data = pd.DataFrame(all_data.loc[:,quant_indices])
converted_data = data.apply(pd.to_numeric)

In [8]:
converted_data.describe()

Unnamed: 0,championId,participantId,spell1Id,spell2Id,stats_assists,stats_champLevel,stats_combatPlayerScore,stats_damageDealtToObjectives,stats_damageDealtToTurrets,stats_damageSelfMitigated,...,stats_trueDamageDealtToChampions,stats_trueDamageTaken,stats_turretKills,stats_unrealKills,stats_visionScore,stats_visionWardsBoughtInGame,stats_wardsKilled,stats_wardsPlaced,teamId,timeline_participantId
count,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,...,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0,87303.0
mean,170.696356,6.254367,6.903886,8.103124,7.710216,13.458724,0.0,9015.701728,2539.271045,15000.532364,...,1099.87556,1048.450225,0.955076,0.0,28.113272,3.502549,4.03694,12.048234,159.058681,6.254367
std,199.276353,3.064484,4.055855,4.333837,5.646779,2.646758,0.0,9945.387265,3028.440469,11955.833238,...,1692.174728,1107.731309,1.285323,0.0,20.27908,3.371286,3.645379,10.630809,49.172838,3.064484
min,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,1.0
25%,42.0,4.0,4.0,4.0,4.0,12.0,0.0,1860.0,327.0,7077.0,...,76.0,345.0,0.0,0.0,15.0,1.0,1.0,6.0,100.0,4.0
50%,103.0,7.0,4.0,7.0,7.0,14.0,0.0,5307.0,1496.0,11634.0,...,594.0,723.0,0.0,0.0,22.0,3.0,3.0,9.0,200.0,7.0
75%,202.0,9.0,11.0,12.0,11.0,15.0,0.0,12880.0,3625.5,19101.0,...,1280.0,1368.0,1.0,0.0,35.0,5.0,6.0,13.0,200.0,9.0
max,887.0,10.0,21.0,21.0,43.0,18.0,0.0,83654.0,37941.0,166534.0,...,23906.0,14942.0,10.0,0.0,174.0,61.0,34.0,97.0,200.0,10.0


### create correlation matrix

In [None]:
corr_matrix = converted_data.corr()

In [None]:
plt.figure(figsize=(30,30))
sn.heatmap(corr_matrix, annot=True, cmap="PiYG", center=0)
plt.savefig('plots/heatmap.png')

In [None]:
corr_matrix.unstack().dropna().drop_duplicates().sort_values(ascending=False).to_csv('correlations.csv')

### create violin-plots for all numeric fields

In [None]:
Path('plots/violin').mkdir(parents=True, exist_ok=True)

In [None]:
N = len(X)

for i in range(len(common_keys)):
    plt.clf();
    column = converted_data.loc[:,common_keys[i]].sample(n=N, ignore_index=True);
    plt.violinplot(column);
    plt.ylabel(common_keys[i]);
    plt.savefig('plots/violin/' + common_keys[i] + '.png');

### create scatter matrix

In [None]:
converted_data_with_target = converted_data.copy(deep=True)
converted_data_with_target['target'] = [int(y_ > 0) for y_ in Y]

In [None]:
# plt.figure(figsize=(30,30))
plt.clf();
sn.pairplot(converted_data_with_target.sample(n=100, ignore_index=True))
plt.savefig('plots/pairplot.png')

### correlation with target

In [19]:
X, Y

([{'participantId': 10,
   'teamId': 200,
   'championId': 236,
   'spell1Id': 1,
   'spell2Id': 4,
   'stats_participantId': 10,
   'stats_win': True,
   'stats_item0': 6671,
   'stats_item1': 1038,
   'stats_item2': 3158,
   'stats_item3': 3508,
   'stats_item4': 1055,
   'stats_item5': 0,
   'stats_item6': 3363,
   'stats_kills': 8,
   'stats_deaths': 1,
   'stats_assists': 1,
   'stats_largestKillingSpree': 5,
   'stats_largestMultiKill': 2,
   'stats_killingSprees': 2,
   'stats_longestTimeSpentLiving': 706,
   'stats_doubleKills': 1,
   'stats_tripleKills': 0,
   'stats_quadraKills': 0,
   'stats_pentaKills': 0,
   'stats_unrealKills': 0,
   'stats_totalDamageDealt': 148823,
   'stats_magicDamageDealt': 5191,
   'stats_physicalDamageDealt': 142233,
   'stats_trueDamageDealt': 1398,
   'stats_largestCriticalStrike': 500,
   'stats_totalDamageDealtToChampions': 17281,
   'stats_magicDamageDealtToChampions': 2535,
   'stats_physicalDamageDealtToChampions': 14745,
   'stats_trueDamag