In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm_notebook
import tqdm
import json

In [2]:
train = pd.read_csv('data/dota2_skill_train.csv', index_col='id')
test = pd.read_csv('data/dota2_skill_test.csv', index_col='id')

heroes = pd.read_csv('data/dota2_heroes.csv', index_col='hero_id')
abilities = pd.read_csv('data/dota2_abilities.csv', index_col='ability_id')
items = pd.read_csv('data/dota2_items.csv', index_col='item_id')

In [3]:
mapping = {'dire':0, 'radiant':1}
train['player_team'] = train['player_team'].map(mapping)
train['winner_team'] = train['winner_team'].map(mapping)
test['player_team'] = test['player_team'].map(mapping)
test['winner_team'] = test['winner_team'].map(mapping)

In [4]:
train['delta_kills'] = train['best_kills_x16'  ] - train['avg_kills_x16']
train['delta_assists'] = train['best_assists_x16'] - train['avg_assists_x16']
train['delta_gpm'] = train['best_gpm_x16']     - train['avg_gpm_x16']
train['delta_xpm'] = train['best_xpm_x16']     - train['avg_xpm_x16']
train['delta_streak'] = train['best_win_streak']     - train['win_streak']

test['delta_kills'] = test['best_kills_x16'  ] - test['avg_kills_x16']
test['delta_assists'] = test['best_assists_x16'] - test['avg_assists_x16']
test['delta_gpm'] = test['best_gpm_x16']     - test['avg_gpm_x16']
test['delta_xpm'] = test['best_xpm_x16']     - test['avg_xpm_x16']
test['delta_streak'] = test['best_win_streak']     - test['win_streak']

In [5]:
train['gold/gold_spent'] = (train['gold'] + 0.01) / (train['gold_spent'] + 0.01)
train['gold_spent/goldpermin'] = (train['gold_spent'] + 0.01) / (train['gold_per_min'] + 0.01)
train['gold_spent/net_worth'] = (train['gold_spent'] + 0.01) / (train['net_worth'] + 0.01)
train['is_player_team_win'] = (train['player_team'] == train['winner_team']).astype('int')
train['duration*gpm'] = (train['duration'] / 60) * train['gold_per_min']
train['duration*xpm'] = (train['duration'] / 60) * train['xp_per_min']
# train['armor'] = train['base_armor'] + train['level'] * train['agi_gain'] / 6 
# train['rate_range_attack'] = train['attack_range'] * train['attack_rate']

for i in ['skilled','gold_per_min','xp_per_min', 'avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 = train.groupby('hero_id')[i].mean()
    train['gp_h{}_mean'.format(i)] = train['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 =train.groupby('hero_id')[i].median()
    train['gp_h{}_median'.format(i)] = train['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 =train.groupby('hero_id')[i].max()
    train['gp_h{}_max'.format(i)] = train['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    train['delta_{}_mean'.format(i)] = train['gp_h{}_mean'.format(i)] - train[i]
    
for i in ['gold_per_min','xp_per_min', 'avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    train['delta_{}_max'.format(i)] = train['gp_h{}_max'.format(i)] - train[i]

for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    train['delta_{}_median'.format(i)] = train['gp_h{}_median'.format(i)] - train[i]

In [6]:
test['gold/gold_spent'] = (test['gold'] + 0.01) / (test['gold_spent'] + 0.01)
test['gold_spent/goldpermin'] = (test['gold_spent'] + 0.01) / (test['gold_per_min'] + 0.01)
test['gold_spent/net_worth'] = (test['gold_spent'] + 0.01) / (test['net_worth'] + 0.01)
test['is_player_team_win'] = (test['player_team'] == test['winner_team']).astype('int')
test['duration*gpm'] = (test['duration'] / 60) * test['gold_per_min']
test['duration*xpm'] = (test['duration'] / 60) * test['xp_per_min']
# test['armor'] = test['base_armor'] + test['level'] * test['agi_gain'] / 6 
# test['rate_range_attack'] = test['attack_range'] * test['attack_rate']

mn_1 = train.groupby('hero_id')['skilled'].mean()
test['gp_hskilled_mean'] = test['hero_id'].map(mn_1)

for i in ['gold_per_min','xp_per_min', 'avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 =test.groupby('hero_id')[i].mean()
    test['gp_h{}_mean'.format(i)] = test['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 =test.groupby('hero_id')[i].median()
    test['gp_h{}_median'.format(i)] = test['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    mn_1 =test.groupby('hero_id')[i].max()
    test['gp_h{}_max'.format(i)] = test['hero_id'].map(mn_1)
    
for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    test['delta_{}_mean'.format(i)] = test['gp_h{}_mean'.format(i)] - test[i]
    
for i in ['gold_per_min','xp_per_min', 'avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    test['delta_{}_max'.format(i)] = test['gp_h{}_max'.format(i)] - test[i]

for i in ['gold_per_min','xp_per_min','avg_kills_x16','avg_deaths_x16','avg_assists_x16','avg_gpm_x16','avg_xpm_x16','best_kills_x16','best_assists_x16','best_gpm_x16','best_xpm_x16','win_streak','best_win_streak']:
    test['delta_{}_median'.format(i)] = test['gp_h{}_median'.format(i)] - test[i]

In [7]:
train['hero_damage/team_fight'] = train['hero_damage'] * train['team_fight_participation']
test['hero_damage/team_fight'] = test['hero_damage'] * test['team_fight_participation']

In [8]:
train['delta_wards'] = train['sentry_wards_placed'] + train['observer_wards_placed'] 
test['delta_wards'] = test['sentry_wards_placed'] + test['observer_wards_placed'] 

In [9]:
train['delta_damage'] = train['hero_damage'] - train['tower_damage']
test['delta_damage'] = test['hero_damage'] - test['tower_damage']

__add ability upgrades__

In [10]:
abilities.behavior = abilities.behavior.fillna('Unit Target')

In [11]:
for ability_behavior in set(abilities.behavior.fillna('empty')):
    train['upgrade_behavior {}'.format(ability_behavior)] = 0
    test['upgrade_behavior {}'.format(ability_behavior)] = 0
    
with open('data/dota2_skill_train.jsonlines') as fin:
    for line in tqdm.tqdm_notebook(fin):
        record = json.loads(line)
        for ability_upgrade in record['ability_upgrades']:
            train.at[record['id'], 'upgrade_behavior {}'.format(
                abilities.loc[ability_upgrade, 'behavior'])] += 1
            
with open('data/dota2_skill_test.jsonlines') as fin:
    for line in tqdm.tqdm_notebook(fin):
        record = json.loads(line)
        for ability_upgrade in record['ability_upgrades']:
            test.at[record['id'], 'upgrade_behavior {}'.format(
                abilities.loc[ability_upgrade, 'behavior'])] += 1

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




__add damage targets__

In [12]:
train_id_damage_targets = pd.read_csv('json_data/train_id_damage_targets.csv', index_col='Unnamed: 0')
test_id_damage_targets = pd.read_csv('json_data/test_id_damage_targets.csv', index_col='Unnamed: 0')

In [13]:
test_id_damage_targets = test_id_damage_targets[train_id_damage_targets.drop('npc_dota_warlock_golem_scepter_1', axis=1).columns]

In [14]:
train = train.join(train_id_damage_targets.astype(int))
test = test.join(test_id_damage_targets.astype(int))

In [15]:
train['damage_targets_sum'] = train_id_damage_targets.sum(axis=1)
test['damage_targets_sum'] = test_id_damage_targets.sum(axis=1)

__add level to sum_xp__

In [16]:
level_map = {1 : 0, 2 : 230, 3 : 600, 4 : 1080, 5 : 1680, 6 : 2300, 7 : 2940, 8 : 3600, 9 : 4280, 10 : 5080, 11 : 5900, 12 : 6740, 13 : 7640, 14 : 8865, 15 : 10115, 16 : 11390, 17 : 12690, 18 : 14015, 19 : 15415, 20 : 16905, 21 : 18405, 22 : 20155, 23 : 22155, 24 : 24405, 25 : 26905}

In [17]:
train['xp_from_level'] = train['level'].map(level_map)
test['xp_from_level'] = test['level'].map(level_map)

__add items / final items__

In [18]:
train_final_items_qual_and_cost = pd.read_csv('json_data/train_final_items_qual_and_cost.csv', index_col='Unnamed: 0')
test_final_items_qual_and_cost = pd.read_csv('json_data/test_final_items_qual_and_cost.csv', index_col='Unnamed: 0')

In [19]:
train = train.join(train_final_items_qual_and_cost.astype(int))
test = test.join(test_final_items_qual_and_cost.astype(int))

In [20]:
train_items_qual_count = pd.read_csv('json_data/train_items_qual_count.csv', index_col='Unnamed: 0')
train = train.join(train_items_qual_count)

In [21]:
test_items_qual_count = pd.read_csv('json_data/test_items_qual_count.csv', index_col='Unnamed: 0')
test = test.join(test_items_qual_count)

__add series__

In [22]:
train_series = pd.read_csv('json_data/train_series.csv', index_col='Unnamed: 0')
train = train.join(train_series)

In [23]:
test_series = pd.read_csv('json_data/test_series.csv', index_col='Unnamed: 0')
test = test.join(test_series)

__add level up times__

In [24]:
l_u_train = pd.read_csv('json_data/l_u_train.csv', index_col='Unnamed: 0')
train = train.join(l_u_train)

In [25]:
l_u_test = pd.read_csv('json_data/l_u_test.csv', index_col='Unnamed: 0')
test = test.join(l_u_test)

__heroes__

In [30]:
train_teammates = pd.read_csv('json_data/train_teammates.csv', index_col='Unnamed: 0')
test_teammates = pd.read_csv('data/test_teammates.csv', index_col='Unnamed: 0')

In [31]:
a = ['teammate_{}_hero'.format(i) for i in list(range(0,116))]
train_teammates.columns = a

a = ['teammate_{}_hero'.format(i) for i in list(range(0,116))]
test_teammates.columns = a

In [41]:
train_tm_stats = pd.read_csv('json_data/train_tm_stats.csv', index_col='Unnamed: 0')
test_tm_stats = pd.read_csv('json_data/test_tm_stats.csv', index_col='Unnamed: 0')

In [33]:
# train_tm_stats.columns = ['tm_agi_mean','tm_attack_range_mean','tm_attack_rate_mean ','tm_winrate_mean','tm_agi_std','tm_attack_range_std','tm_attack_rate_std ','tm_winrate_std', 'tm_Carry', 'tm_Escape', 'tm_Nuker', 'tm_Initiator', 'tm_Durable', 'tm_Disabler','tm_Jungler', 'tm_Support', 'tm_Pusher']
# test_tm_stats.columns = ['tm_agi_mean','tm_attack_range_mean','tm_attack_rate_mean ','tm_winrate_mean','tm_agi_std','tm_attack_range_std','tm_attack_rate_std ','tm_winrate_std', 'tm_Carry', 'tm_Escape', 'tm_Nuker', 'tm_Initiator', 'tm_Durable', 'tm_Disabler','tm_Jungler', 'tm_Support', 'tm_Pusher']

In [42]:
#train = train.join(train_teammates)
train = train.join(train_tm_stats)

In [43]:
#test = test.join(test_teammates)
test = test.join(test_tm_stats)

# Sava data

In [45]:
train.shape[1], test.shape[1]

(619, 617)

In [46]:
train.to_csv('data/trian_v3_1.csv')
test.to_csv('data/test_v3_1.csv')