# Exploratory Data Analysis

## Initial Setup and Data Load

In [1]:
# ---------------- Basic Data Science Libraries ---------------- #
import numpy as np # Linear algebra
import pandas as pd # Data processing

# ---------------- System Libraries ---------------- #
import os # Miscellaneous operating system interfaces
import gc # Garbage collector interface
import nbimporter # Use functions from other Jupyter Notebooks
from subprocess import check_output # Saves results written to the current directory as output

# ---------------- Plotting Libraries ---------------- #
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# ---------------- Removing the max columns limiter ---------------- #
pd.set_option('display.max_columns', None)

# ---------------- Printing files in my clean data folder ---------------- #
print(check_output(['ls', '../Data/Clean']).decode('utf8'))

ability_ids.csv
ability_upgrades.csv
chat.csv
eng_chat.csv
hero_ids.csv
item_ids.csv
matches.csv
mmr.csv
objectives.csv
patch_dates.csv
player_time.csv
players.csv
positions.csv
prev_outcomes.csv
purchase_log.csv
regions.csv
teamfights.csv
teamfights_players.csv
test_outcomes.csv
test_players.csv
trueskill.csv



In [2]:
# Loading up players.csv
file_path = '../Data/Clean/players.csv'
players = pd.read_csv(file_path, index_col=0)

# Print the shape of the file
print(f'{file_path}:', '{:,} observations, {:,} features'.format(players.shape[0], players.shape[1]))

../Data/Clean/players.csv: 425,860 observations, 73 features


In [3]:
# Looking at the info and head of the data
display(players.info())
display(players.head())

<class 'pandas.core.frame.DataFrame'>
Index: 425860 entries, 0 to 499999
Data columns (total 73 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   match_id                          425860 non-null  int64  
 1   match_outcome                     425860 non-null  int64  
 2   account                           425860 non-null  object 
 3   account_id                        425860 non-null  int64  
 4   hero_id                           425860 non-null  int64  
 5   player_slot                       425860 non-null  int64  
 6   match_slot_id                     425860 non-null  object 
 7   gold                              425860 non-null  int64  
 8   gold_spent                        425860 non-null  int64  
 9   gold_per_min                      425860 non-null  int64  
 10  xp_per_min                        425860 non-null  int64  
 11  kills                             425860 non-null  int64 

None

Unnamed: 0,match_id,match_outcome,account,account_id,hero_id,player_slot,match_slot_id,gold,gold_spent,gold_per_min,xp_per_min,kills,deaths,assists,denies,last_hits,stuns,hero_damage,hero_healing,tower_damage,item_0,item_1,item_2,item_3,item_4,item_5,level,leaver_status,xp_hero,xp_creep,xp_roshan,xp_other,gold_other,gold_death,gold_buyback,gold_abandon,gold_sell,gold_destroying_structure,gold_killing_heros,gold_killing_creeps,gold_killing_roshan,gold_killing_couriers,unit_order_none,unit_order_move_to_position,unit_order_move_to_target,unit_order_attack_move,unit_order_attack_target,unit_order_cast_position,unit_order_cast_target,unit_order_cast_target_tree,unit_order_cast_no_target,unit_order_cast_toggle,unit_order_hold_position,unit_order_train_ability,unit_order_drop_item,unit_order_give_item,unit_order_pickup_item,unit_order_pickup_rune,unit_order_purchase_item,unit_order_sell_item,unit_order_disassemble_item,unit_order_move_item,unit_order_cast_toggle_auto,unit_order_stop,unit_order_buyback,unit_order_glyph,unit_order_eject_item_from_stash,unit_order_cast_rune,unit_order_ping_ability,unit_order_move_to_direction,messages_sent,time_played,cluster
0,0,1,Double T,0,86,0,0_0,3261,10960,347,362,9,3,18,1,30,76.7356,8690,218,143,180,37,73,56,108,0,16,0,8840,5440,0,83,50,-957,0,0,212,3120,5145,1087,400,0,0,4070,1,25,416,51,144,3,71,0,188,16,0,0,0,2,35,2,0,11,0,0,0,0,0,0,6,0,4,2375,155
1,0,1,Monkey,1,51,1,0_1,2954,17760,494,659,13,3,18,9,109,87.4164,23747,0,423,46,63,119,102,24,108,22,0,14331,8440,2683,671,395,-1137,0,0,1650,3299,6676,4317,937,0,0,5894,214,165,1031,98,39,4,439,0,346,22,0,0,12,52,30,4,0,21,0,0,0,0,0,0,14,0,16,2375,155
2,0,1,Trash!!!,0,83,2,0_2,110,12195,350,385,0,4,15,1,58,0.0,4217,1595,399,48,60,59,108,65,0,17,0,6692,8112,0,453,259,-1436,-1015,0,0,3142,2418,3697,400,0,0,7053,3,132,645,36,160,20,373,0,643,17,5,0,7,8,28,0,1,18,0,0,1,0,0,0,17,0,2,2375,155
3,0,1,2,2,11,3,0_3,1179,22505,599,605,8,4,19,6,271,0.0,14832,2714,6055,63,147,154,164,79,160,21,0,8583,14230,894,293,100,-2156,0,0,938,4714,4104,10432,400,0,0,4712,133,163,690,9,15,7,406,0,150,21,0,0,1,9,45,7,0,14,0,0,0,1,0,0,13,0,0,2375,155
4,0,1,Kira,3,67,4,0_4,3307,23825,613,762,20,3,17,13,245,0.0,33740,243,1833,114,92,147,0,137,63,24,0,15814,14325,0,62,0,-1437,-1056,0,4194,3217,7467,9220,400,0,0,3853,7,7,1173,31,84,8,198,0,111,23,1,0,0,2,44,6,0,13,0,0,1,3,0,0,23,0,1,2375,155


---

## Data Preparation

Since our data imported from the [Data Cleaning Notebook](Data_Cleaning_and_Exploration.ipynb) is organized at the match breakdown level, grouping by accounts might be more appropriate to obtain the individual player's statistics.

### Grouping by Player Account

We need to aggregate several features using different methods. Some may benefit from averaging *(e.g., `gold_per_min`)*, while others will provide more insights as a sum, mean, or unique values.

In [3]:
players.columns

Index(['match_id', 'match_outcome', 'account', 'account_id', 'hero_id',
       'player_slot', 'match_slot_id', 'gold', 'gold_spent', 'gold_per_min',
       'xp_per_min', 'kills', 'deaths', 'assists', 'denies', 'last_hits',
       'stuns', 'hero_damage', 'hero_healing', 'tower_damage', 'item_0',
       'item_1', 'item_2', 'item_3', 'item_4', 'item_5', 'level',
       'leaver_status', 'xp_hero', 'xp_creep', 'xp_roshan', 'xp_other',
       'gold_other', 'gold_death', 'gold_buyback', 'gold_abandon', 'gold_sell',
       'gold_destroying_structure', 'gold_killing_heros',
       'gold_killing_creeps', 'gold_killing_roshan', 'gold_killing_couriers',
       'unit_order_none', 'unit_order_move_to_position',
       'unit_order_move_to_target', 'unit_order_attack_move',
       'unit_order_attack_target', 'unit_order_cast_position',
       'unit_order_cast_target', 'unit_order_cast_target_tree',
       'unit_order_cast_no_target', 'unit_order_cast_toggle',
       'unit_order_hold_position', 'unit_o

In [6]:
# Defining a function to calculate the mode in a Series
def calculate_mode(series):
    '''
    Simple function to return the mode of a series and use it in the .agg function of a pd.DataFrame
    '''
    return series.mode().iloc[0] if not series.empty else np.nan

# Selecting the agg functions for each column
agg_funcs = {
    'match_id': 'count',
    'match_outcome': 'sum', 
    'hero_id': calculate_mode,
    'player_slot': calculate_mode, 
    'gold': 'median', 
    'gold_spent': 'median', 
    'gold_per_min': 'median',
    'xp_per_min': 'median',
    'kills': 'median', 
    'deaths': 'median', 
    'assists': 'median', 
    'denies': 'median', 
    'last_hits': 'median',
    'stuns': 'mean', 
    'hero_damage': 'median', 
    'hero_healing': 'median', 
    'tower_damage': 'median', 
    'item_0': calculate_mode,
    'item_1': calculate_mode, 
    'item_2': calculate_mode, 
    'item_3': calculate_mode, 
    'item_4': calculate_mode, 
    'item_5': calculate_mode, 
    'level': 'median',
    'leaver_status': calculate_mode, 
    'xp_hero': 'median', 
    'xp_creep': 'median', 
    'xp_roshan': 'median', 
    'xp_other': 'median',
    'gold_other': 'median', 
    'gold_death': 'median', 
    'gold_buyback': 'median', 
    'gold_abandon': 'median', 
    'gold_sell': 'median',
    'gold_destroying_structure': 'median', 
    'gold_killing_heros': 'median',
    'gold_killing_creeps': 'median', 
    'gold_killing_roshan': 'median', 
    'gold_killing_couriers': 'median',
    'unit_order_none': 'median', 
    'unit_order_move_to_position': 'median',
    'unit_order_move_to_target': 'median', 
    'unit_order_attack_move': 'median',
    'unit_order_attack_target': 'median', 
    'unit_order_cast_position': 'median',
    'unit_order_cast_target': 'median', 
    'unit_order_cast_target_tree': 'median',
    'unit_order_cast_no_target': 'median', 
    'unit_order_cast_toggle': 'median',
    'unit_order_hold_position': 'median', 
    'unit_order_train_ability': 'median',
    'unit_order_drop_item': 'median', 
    'unit_order_give_item': 'median',
    'unit_order_pickup_item': 'median', 
    'unit_order_pickup_rune': 'median',
    'unit_order_purchase_item': 'median', 
    'unit_order_sell_item': 'median',
    'unit_order_disassemble_item': 'median', 
    'unit_order_move_item': 'median',
    'unit_order_cast_toggle_auto': 'median', 
    'unit_order_stop': 'median', 
    'unit_order_buyback': 'median',
    'unit_order_glyph': 'median', 
    'unit_order_eject_item_from_stash': 'median',
    'unit_order_cast_rune': 'median', 
    'unit_order_ping_ability': 'median',
    'unit_order_move_to_direction': 'median', 
    'messages_sent': 'sum', 
    'time_played': 'sum',
    'cluster': 'nunique'
}
players.groupby('account', as_index=False).agg(agg_funcs)

Unnamed: 0,account,match_id,match_outcome,hero_id,player_slot,gold,gold_spent,gold_per_min,xp_per_min,kills,deaths,assists,denies,last_hits,stuns,hero_damage,hero_healing,tower_damage,item_0,item_1,item_2,item_3,item_4,item_5,level,leaver_status,xp_hero,xp_creep,xp_roshan,xp_other,gold_other,gold_death,gold_buyback,gold_abandon,gold_sell,gold_destroying_structure,gold_killing_heros,gold_killing_creeps,gold_killing_roshan,gold_killing_couriers,unit_order_none,unit_order_move_to_position,unit_order_move_to_target,unit_order_attack_move,unit_order_attack_target,unit_order_cast_position,unit_order_cast_target,unit_order_cast_target_tree,unit_order_cast_no_target,unit_order_cast_toggle,unit_order_hold_position,unit_order_train_ability,unit_order_drop_item,unit_order_give_item,unit_order_pickup_item,unit_order_pickup_rune,unit_order_purchase_item,unit_order_sell_item,unit_order_disassemble_item,unit_order_move_item,unit_order_cast_toggle_auto,unit_order_stop,unit_order_buyback,unit_order_glyph,unit_order_eject_item_from_stash,unit_order_cast_rune,unit_order_ping_ability,unit_order_move_to_direction,messages_sent,time_played,cluster
0,!,13,8,104,0,3325.0,15280.0,509.0,489.0,8.0,5.0,10.0,3.0,134.0,14.963793,10247.0,0.0,1080.0,180,46,63,63,0,0,17.0,0,6746.0,9948.0,0.0,276.0,129.0,-1585.0,0.0,0.0,337.0,3325.0,4803.0,5072.0,0.0,0.0,0.0,2855.0,21.0,22.0,493.0,24.0,34.0,5.0,90.0,0.0,67.0,17.0,1.0,0.0,1.0,7.0,28.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,58,28535,10
1,! Ale<3,2,2,74,3,4398.5,23512.5,514.0,566.0,16.0,10.0,16.0,3.5,199.0,82.541550,22374.5,0.0,2463.5,96,1,100,48,65,108,25.0,0,15284.0,17124.0,0.0,256.5,208.0,-4400.0,0.0,0.0,1137.0,4215.5,8516.5,10384.0,560.5,87.5,0.0,5043.0,125.0,211.5,485.5,85.0,64.0,2.0,1253.0,0.0,0.0,27.0,1.0,0.0,3.0,20.0,33.5,4.0,0.0,9.5,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,4,6960,2
2,! ! ! ! !,1,0,22,4,942.0,14675.0,416.0,561.0,14.0,6.0,7.0,3.0,128.0,28.947800,24772.0,0.0,22.0,100,41,1,48,235,0,20.0,0,10378.0,11320.0,0.0,662.0,500.0,-3054.0,0.0,0.0,37.0,400.0,6410.0,5272.0,0.0,0.0,0.0,3524.0,109.0,1.0,140.0,63.0,217.0,0.0,132.0,0.0,4.0,20.0,4.0,3.0,6.0,13.0,20.0,1.0,1.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,5,2387,1
3,! #ZjoY,1,0,17,132,1403.0,17295.0,471.0,639.0,16.0,11.0,13.0,1.0,87.0,40.246300,20219.0,598.0,0.0,98,121,58,63,41,57,22.0,0,17459.0,7378.0,596.0,721.0,497.0,-4734.0,0.0,0.0,810.0,360.0,10578.0,3553.0,200.0,0.0,0.0,3128.0,43.0,195.0,512.0,110.0,44.0,2.0,295.0,0.0,79.0,23.0,0.0,0.0,4.0,46.0,41.0,5.0,1.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,1,2454,1
4,! ChosKott ツ!,2,2,21,1,2447.0,9700.0,329.0,301.5,3.5,7.0,16.0,1.5,48.5,79.786350,8306.0,351.5,685.5,102,0,36,1,214,88,14.5,0,6605.0,4883.5,298.0,229.0,160.5,-1628.0,-333.0,0.0,45.0,3291.5,3223.5,1733.0,200.0,0.0,0.0,2613.0,53.0,13.0,199.0,48.0,78.0,4.0,43.5,0.0,0.0,14.0,3.0,0.0,1.5,5.5,41.0,1.0,0.0,27.0,0.0,0.0,0.5,0.0,1.5,0.0,2.0,0.0,2,4936,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255846,🔯RayQuaZa^🔯,1,1,39,1,5433.0,20090.0,592.0,693.0,6.0,3.0,18.0,8.0,250.0,0.000000,14741.0,0.0,4877.0,96,98,108,65,41,48,24.0,0,9749.0,18550.0,596.0,1741.0,1005.0,-1497.0,0.0,0.0,232.0,4807.0,3571.0,12167.0,200.0,0.0,0.0,4223.0,109.0,101.0,810.0,109.0,79.0,5.0,193.0,0.0,214.0,24.0,6.0,1.0,7.0,90.0,29.0,1.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,10,2649,1
255847,🔱 @PoC@L!pS!S.ImI💰🚀,1,1,47,0,1354.0,18670.0,435.0,494.0,16.0,9.0,22.0,5.0,146.0,0.267500,24653.0,4102.0,4500.0,63,79,108,21,170,135,21.0,0,13738.0,9995.0,596.0,394.0,134.0,-4401.0,0.0,0.0,505.0,4143.0,6705.0,5582.0,200.0,0.0,0.0,3978.0,78.0,321.0,471.0,5.0,89.0,4.0,81.0,3.0,0.0,21.0,2.0,0.0,2.0,6.0,39.0,1.0,0.0,7.0,56.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2,2998,1
255848,😂Q😂U😂E😂E😂N,1,0,8,129,655.0,7590.0,352.0,383.0,1.0,7.0,3.0,3.0,92.0,0.000000,5501.0,0.0,262.0,172,11,50,69,71,3,13.0,0,2586.0,6912.0,0.0,125.0,100.0,-1733.0,0.0,0.0,0.0,350.0,2401.0,3488.0,0.0,0.0,0.0,2253.0,56.0,18.0,731.0,10.0,9.0,32.0,102.0,0.0,364.0,13.0,0.0,0.0,0.0,2.0,23.0,0.0,0.0,6.0,0.0,14.0,0.0,1.0,0.0,0.0,2.0,0.0,1,1503,1
255849,😎 الجمال,2,1,9,4,2119.5,12110.0,451.5,465.5,8.5,7.0,7.5,2.5,72.5,36.627750,11991.0,0.0,616.5,41,41,63,36,21,8,16.5,0,7652.5,6639.0,298.0,495.0,351.5,-1988.0,-312.5,0.0,632.0,2364.0,5594.0,2883.0,206.0,0.0,0.0,2200.5,1.0,0.0,330.5,37.5,1.0,10.5,311.5,1.0,13.0,16.5,0.0,1.0,0.0,15.0,29.0,1.5,0.0,13.5,0.0,144.5,0.5,1.5,0.0,0.0,1.0,0.0,25,3841,1


## Statistical Analysis