In [1]:
import pandas as pd, numpy as np
import os, time
from tqdm import tqdm 

In [2]:
os.listdir('input')

['new_te_abl.feather',
 'tr_abl.feather',
 'te_rc.feather',
 'sample_submission.csv',
 'te_cam.feather',
 'tr_cam.feather',
 'test.feather',
 'train.csv',
 'new_tr_rc.feather',
 'new_tr_cam.feather',
 'new_te_rc.feather',
 'new_te_cam.feather',
 'te_abl.feather',
 'train.feather',
 'test.csv',
 'tr_rc.feather',
 'new_tr_abl.feather']

In [3]:
from pathlib import Path
data_dir = Path('input')

In [4]:
%%time
tr = pd.read_feather(os.path.join(data_dir, 'train.feather'))
te = pd.read_feather(os.path.join(data_dir, 'test.feather'))
submission = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

CPU times: user 33.3 s, sys: 9.26 s, total: 42.6 s
Wall time: 1min 52s


In [5]:
tr.head()

Unnamed: 0,game_id,winner,time,player,species,event,event_contents
0,0,1,0.0,0,T,Camera,"at (145.25, 21.5078125)"
1,0,1,0.0,1,T,Camera,"at (22.75, 147.0078125)"
2,0,1,0.02,0,T,Selection,['OrbitalCommand [3080001]']
3,0,1,0.02,0,T,Ability,(1360) - TrainSCV
4,0,1,0.14,0,T,Camera,"at (142.99609375, 24.50390625)"


In [6]:
def base_feat(df):
    df['time'] = df['time'].astype(str)
    df['sec'] = df['time'].apply(lambda x: x.split(".")[-1])
    df['min'] = df['time'].apply(lambda x: x.split(".")[0])
    df['min'] = df['min'].astype(int)*60
    df['sec'] = df['sec'].astype(int)
    df['time'] = df['min']+tr['sec']
    
    dfg = df.groupby(['game_id','player'])['time'].agg(['sum','mean','std','count','last'])
    dfg = dfg.reset_index()
    
    p0 = dfg[dfg['player']==0]
    p1 = dfg[dfg['player']==1]
    
    p0_cols = ["p0_"+x for x in p0.columns[2:]]
    p1_cols = ["p1_"+x for x in p1.columns[2:]]
    
    p0.columns = ['game_id','player']+p0_cols
    p1.columns = ['game_id','player']+p1_cols
    
    new_df = p0.drop('player',axis=1).merge(p1.drop('player',axis=1), on='game_id',how='left')
#     new_df['game_duration'] = df.groupby(['game_id'])['time'].max()
    del p0,p1
    
    ###
    spec = df.groupby(['game_id','player'])['species'].agg(['max'])
    spec = spec.reset_index()
    
    p0_spec = spec[spec['player']==0]
    p1_spec = spec[spec['player']==1]
    p0_spec.drop(['player'], axis=1, inplace=True)
    p1_spec.drop(['player'], axis=1, inplace=True)
    p0_spec.columns = ['game_id','p0_species']
    p1_spec.columns = ['game_id','p1_species']
    
    new_df = new_df.merge(p0_spec, on='game_id', how='left')
    new_df = new_df.merge(p1_spec, on='game_id', how='left')
    new_df['p0_species'] = new_df['p0_species'].astype('category')
    new_df['p1_species'] = new_df['p1_species'].astype('category')

    return new_df

In [7]:
%%time
tr_set = base_feat(tr)
te_set = base_feat(te)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


CPU times: user 2min 15s, sys: 11.1 s, total: 2min 26s
Wall time: 2min 26s


In [8]:
%%time
# day_list = ['Friday','Monday','Saturday','Sunday','Thursday','Tuesday','Wednesday']
tr_p0 = tr[tr['player']==0].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
tr_p0 = tr_p0.reset_index()
tr_p1 = tr[tr['player']==1].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
tr_p1 = tr_p1.reset_index()
te_p0 = te[te['player']==0].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
te_p0 = te_p0.reset_index()
te_p1 = te[te['player']==1].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
te_p1 = te_p1.reset_index()

CPU times: user 21.5 s, sys: 3.58 s, total: 25.1 s
Wall time: 25.2 s


In [9]:
tr_p0.columns.name =""
tr_p1.columns.name =""
te_p0.columns.name =""
te_p1.columns.name =""

In [10]:
p0_cols = ["p0_"+x for x in tr_p0.columns[2:]]
p1_cols = ["p1_"+x for x in tr_p1.columns[2:]]

In [11]:
tr_p0.columns = ['game_id','player']+p0_cols
tr_p1.columns = ['game_id','player']+p1_cols
te_p0.columns = ['game_id','player']+p0_cols
te_p1.columns = ['game_id','player']+p1_cols

In [12]:
print(tr_p0.shape)
tr_p0.head()

(38872, 10)


Unnamed: 0,game_id,player,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup
0,0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068
1,1,0,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177
2,2,0,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451
3,3,0,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432
4,4,0,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352


In [13]:
tr_p1.head()

Unnamed: 0,game_id,player,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,1,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,1,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,1,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,1,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,1,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [14]:
te_p1.head()

Unnamed: 0,game_id,player,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,38872,1,0.053147,0.005594,0.653147,,0.06014,0.170629,0.057343,
1,38873,1,0.071979,0.002571,0.375321,,0.313625,0.174807,0.048843,0.012853
2,38874,1,0.035194,,0.26699,,0.51699,0.131068,0.043689,0.006068
3,38875,1,0.067207,0.001159,0.413673,,0.02781,0.373117,0.114716,0.002317
4,38876,1,0.037559,,0.755869,,0.004695,0.032864,0.164319,0.004695


In [15]:
new_tr = tr_p0.drop('player',axis=1).merge(tr_p1.drop('player',axis=1), on='game_id', how='left')
new_te = te_p0.drop('player',axis=1).merge(te_p1.drop('player',axis=1), on='game_id', how='left')

In [16]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [17]:
tr_set.head()

Unnamed: 0,game_id,p0_sum,p0_mean,p0_std,p0_count,p0_last,p1_sum,p1_mean,p1_std,p1_count,p1_last,p0_species,p1_species
0,0,140456,237.256757,122.917337,592,444,136478,249.047445,101.627387,548,444,T,T
1,1,369113,301.809485,175.342738,1223,599,387048,319.610239,169.485186,1211,599,P,T
2,2,253983,298.452409,183.566955,851,599,457492,333.692195,167.738309,1371,599,P,Z
3,3,437299,314.830094,171.846653,1389,599,456423,281.395191,171.076841,1622,597,T,P
4,4,178444,184.724638,89.94578,966,339,102764,157.855607,99.618136,651,343,T,Z


In [18]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [19]:
new_tr.fillna(0, inplace=True)
new_te.fillna(0, inplace=True)

In [20]:
%%time
new_tr = new_tr.merge(tr_set, on='game_id',how='left')
new_te = new_te.merge(te_set, on='game_id',how='left')

CPU times: user 24.9 ms, sys: 0 ns, total: 24.9 ms
Wall time: 24.6 ms


In [21]:
new_tr.dtypes

game_id                    int64
p0_Ability               float64
p0_AddToControlGroup     float64
p0_Camera                float64
p0_ControlGroup          float64
p0_GetControlGroup       float64
p0_Right Click           float64
p0_Selection             float64
p0_SetControlGroup       float64
p1_Ability               float64
p1_AddToControlGroup     float64
p1_Camera                float64
p1_ControlGroup          float64
p1_GetControlGroup       float64
p1_Right Click           float64
p1_Selection             float64
p1_SetControlGroup       float64
p0_sum                     int64
p0_mean                  float64
p0_std                   float64
p0_count                   int64
p0_last                    int64
p1_sum                     int64
p1_mean                  float64
p1_std                   float64
p1_count                   int64
p1_last                    int64
p0_species              category
p1_species              category
dtype: object

In [22]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,...,p0_std,p0_count,p0_last,p1_sum,p1_mean,p1_std,p1_count,p1_last,p0_species,p1_species
0,0,0.057432,0.003378,0.75,0.0,0.040541,0.059122,0.084459,0.005068,0.062044,...,122.917337,592,444,136478,249.047445,101.627387,548,444,T,T
1,1,0.06296,0.000818,0.512674,0.0,0.132461,0.130826,0.152085,0.008177,0.055326,...,175.342738,1223,599,387048,319.610239,169.485186,1211,599,P,T
2,2,0.081081,0.007051,0.485311,0.0,0.116334,0.188014,0.105758,0.016451,0.061999,...,183.566955,851,599,457492,333.692195,167.738309,1371,599,P,Z
3,3,0.059035,0.0,0.513319,0.0,0.095032,0.198704,0.12959,0.00432,0.054871,...,171.846653,1389,599,456423,281.395191,171.076841,1622,597,T,P
4,4,0.059006,0.001035,0.445135,0.0,0.231884,0.18323,0.069358,0.010352,0.0553,...,89.94578,966,339,102764,157.855607,99.618136,651,343,T,Z


In [23]:
new_tr.to_feather("../dacon_sc/input/new_tr.feather")
new_te.to_feather("../dacon_sc/input/new_te.feather")