In [21]:
import pandas as pd, numpy as np
import os, time, gc
from tqdm import tqdm 
pd.options.mode.chained_assignment = None

In [2]:
os.listdir('input')

['.ipynb_checkpoints',
 'new_te.feather',
 'new_te_abl.feather',
 'new_te_cam.feather',
 'new_te_cam_ts.feather',
 'new_te_eve_ts.feather',
 'new_te_rc.feather',
 'new_te_rc_ts.feather',
 'new_tr.feather',
 'new_tr_abl.feather',
 'new_tr_cam.feather',
 'new_tr_cam_ts.feather',
 'new_tr_eve_ts.feather',
 'new_tr_rc.feather',
 'new_tr_rc_ts.feather',
 'sample_submission.csv',
 'te.feather',
 'test.csv',
 'te_abl.feather',
 'te_cam.feather',
 'te_rc.feather',
 'te_tsact_sp.feather',
 'te_v7.feather',
 'tr.feather',
 'train.csv',
 'tr_abl.feather',
 'tr_rc.feather',
 'tr_tsact_sp.feather',
 'tr_v7.feather']

In [3]:
%%time
tr = pd.read_feather('../dacon_sc/input/tr.feather')
te = pd.read_feather('../dacon_sc/input/te.feather')
submission = pd.read_csv('../dacon_sc/input/sample_submission.csv')

Wall time: 34.3 s


In [4]:
tr.head()

Unnamed: 0,game_id,winner,time,player,species,event,event_contents
0,0,1,0.0,0,T,Camera,"at (145.25, 21.5078125)"
1,0,1,0.0,1,T,Camera,"at (22.75, 147.0078125)"
2,0,1,0.02,0,T,Selection,['OrbitalCommand [3080001]']
3,0,1,0.02,0,T,Ability,(1360) - TrainSCV
4,0,1,0.14,0,T,Camera,"at (142.99609375, 24.50390625)"


def base_feat(df):
    df['time'] = df['time'].astype(str)
    df['sec'] = df['time'].apply(lambda x: x.split(".")[-1])
    df['min'] = df['time'].apply(lambda x: x.split(".")[0])
    df['min'] = df['min'].astype(int)*60
    df['sec'] = df['sec'].astype(int)
    df['time'] = df['min']+tr['sec']
    
    dfg = df.groupby(['game_id','player'])['time'].agg(['sum','mean','std','count','last'])
    dfg = dfg.reset_index()
    
    p0 = dfg[dfg['player']==0]
    p1 = dfg[dfg['player']==1]
    
    p0_cols = ["p0_"+x for x in p0.columns[2:]]
    p1_cols = ["p1_"+x for x in p1.columns[2:]]
    
    p0.columns = ['game_id','player']+p0_cols
    p1.columns = ['game_id','player']+p1_cols
    
    new_df = p0.drop('player',axis=1).merge(p1.drop('player',axis=1), on='game_id',how='left')
#     new_df['game_duration'] = df.groupby(['game_id'])['time'].max()
    del p0,p1
    
    ###
    spec = df.groupby(['game_id','player'])['species'].agg(['max'])
    spec = spec.reset_index()
    
    p0_spec = spec[spec['player']==0]
    p1_spec = spec[spec['player']==1]
    p0_spec.drop(['player'], axis=1, inplace=True)
    p1_spec.drop(['player'], axis=1, inplace=True)
    p0_spec.columns = ['game_id','p0_species']
    p1_spec.columns = ['game_id','p1_species']
    
    new_df = new_df.merge(p0_spec, on='game_id', how='left')
    new_df = new_df.merge(p1_spec, on='game_id', how='left')
    new_df['p0_species'] = new_df['p0_species'].astype('category')
    new_df['p1_species'] = new_df['p1_species'].astype('category')

    return new_df

%%time
tr_set = base_feat(tr)
te_set = base_feat(te)

In [5]:
def make_timespan(df):

    df['time_span'] = 0
    df['time_span'] = np.where(df['time']<1, 0, 
                                  np.where(df['time']<2, 1, 
                                          np.where(df['time']<3, 2,
                                                  np.where(df['time']<4, 3,
                                                          np.where(df['time']<5, 4,
                                                                  np.where(df['time']<6, 5,
                                                                          np.where(df['time']<7, 6,
                                                                                  np.where(df['time']<8, 7,
                                                                                          np.where(df['time']<9, 8,
                                                                                                   np.where(df['time']<10, 9, 10))))))))))
    return df

In [6]:
tr = make_timespan(tr)
te = make_timespan(te)

In [7]:
tr.head()

Unnamed: 0,game_id,winner,time,player,species,event,event_contents,time_span
0,0,1,0.0,0,T,Camera,"at (145.25, 21.5078125)",0
1,0,1,0.0,1,T,Camera,"at (22.75, 147.0078125)",0
2,0,1,0.02,0,T,Selection,['OrbitalCommand [3080001]'],0
3,0,1,0.02,0,T,Ability,(1360) - TrainSCV,0
4,0,1,0.14,0,T,Camera,"at (142.99609375, 24.50390625)",0


In [33]:
def time_n_event(df):
    new_p0 = pd.DataFrame({"game_id":df['game_id'].unique()})
    new_p1 = pd.DataFrame({"game_id":df['game_id'].unique()})
    
    for tsp in tqdm(range(11)):
        p0 = df[(df['player']==0)&(df['time_span']==tsp)]
        tmp = p0.groupby(['game_id','time_span'])['event'].value_counts(dropna=False, normalize=True).unstack()
        tmp.columns= ['TIME_{}_EVENT_{}_p0'.format(tsp, eve_name) for eve_name in tmp.columns]
        tmp.reset_index(inplace=True)
        tmp.fillna(0, inplace=True)
        tmp.drop(['time_span'],axis=1,inplace=True)

        new_p0 = new_p0.merge(tmp, on=['game_id'],how='left')
        del tmp;gc.collect()
        
    for tsp in tqdm(range(11)):
        p1 = df[(df['player']==1)&(df['time_span']==tsp)]
        tmp = p1.groupby(['game_id','time_span'])['event'].value_counts(dropna=False, normalize=True).unstack()
        tmp.columns= ['TIME_{}_EVENT_{}_p1'.format(tsp, eve_name) for eve_name in tmp.columns]
        tmp.reset_index(inplace=True)
        tmp.fillna(0, inplace=True)
        tmp.drop(['time_span'],axis=1,inplace=True)

        new_p1 = new_p1.merge(tmp, on=['game_id'],how='left')
        del tmp;gc.collect()
    
    new_df = new_p0.merge(new_p1, on=['game_id'], how='left')
    
    return new_df    

In [34]:
tr_time_eve = time_n_event(tr)




  0%|                                                                                           | 0/11 [00:00<?, ?it/s][A[A[A


  9%|███████▌                                                                           | 1/11 [00:01<00:19,  1.95s/it][A[A[A


 18%|███████████████                                                                    | 2/11 [00:03<00:17,  1.97s/it][A[A[A


 27%|██████████████████████▋                                                            | 3/11 [00:06<00:16,  2.01s/it][A[A[A


 36%|██████████████████████████████▏                                                    | 4/11 [00:08<00:14,  2.04s/it][A[A[A


 45%|█████████████████████████████████████▋                                             | 5/11 [00:10<00:12,  2.05s/it][A[A[A


 55%|█████████████████████████████████████████████▎                                     | 6/11 [00:12<00:10,  2.01s/it][A[A[A


 64%|████████████████████████████████████████████████████▊                      

In [35]:
tr_time_eve.head()

Unnamed: 0,game_id,TIME_0_EVENT_Ability_p0,TIME_0_EVENT_AddToControlGroup_p0,TIME_0_EVENT_Camera_p0,TIME_0_EVENT_ControlGroup_p0,TIME_0_EVENT_GetControlGroup_p0,TIME_0_EVENT_Right Click_p0,TIME_0_EVENT_Selection_p0,TIME_0_EVENT_SetControlGroup_p0,TIME_1_EVENT_Ability_p0,...,TIME_9_EVENT_Selection_p1,TIME_9_EVENT_SetControlGroup_p1,TIME_10_EVENT_Ability_p1,TIME_10_EVENT_AddToControlGroup_p1,TIME_10_EVENT_Camera_p1,TIME_10_EVENT_ControlGroup_p1,TIME_10_EVENT_GetControlGroup_p1,TIME_10_EVENT_Right Click_p1,TIME_10_EVENT_Selection_p1,TIME_10_EVENT_SetControlGroup_p1
0,0,0.072727,0.0,0.818182,0.0,0.0,0.036364,0.054545,0.018182,0.066667,...,,,,,,,,,,
1,1,0.064935,0.0,0.25974,0.0,0.207792,0.116883,0.298701,0.051948,0.024845,...,0.086093,0.0,,,,,,,,
2,2,0.04065,0.0,0.390244,0.0,0.308943,0.097561,0.105691,0.056911,0.028986,...,0.206704,0.011173,,,,,,,,
3,3,0.035714,0.0,0.241071,0.0,0.241071,0.25,0.205357,0.026786,0.016667,...,0.047297,0.006757,,,,,,,,
4,4,0.075758,0.0,0.454545,0.0,0.060606,0.257576,0.121212,0.030303,0.015625,...,,,,,,,,,,


In [36]:
tr_time_eve.fillna(0, inplace=True)

In [37]:
tr_time_eve.head()

Unnamed: 0,game_id,TIME_0_EVENT_Ability_p0,TIME_0_EVENT_AddToControlGroup_p0,TIME_0_EVENT_Camera_p0,TIME_0_EVENT_ControlGroup_p0,TIME_0_EVENT_GetControlGroup_p0,TIME_0_EVENT_Right Click_p0,TIME_0_EVENT_Selection_p0,TIME_0_EVENT_SetControlGroup_p0,TIME_1_EVENT_Ability_p0,...,TIME_9_EVENT_Selection_p1,TIME_9_EVENT_SetControlGroup_p1,TIME_10_EVENT_Ability_p1,TIME_10_EVENT_AddToControlGroup_p1,TIME_10_EVENT_Camera_p1,TIME_10_EVENT_ControlGroup_p1,TIME_10_EVENT_GetControlGroup_p1,TIME_10_EVENT_Right Click_p1,TIME_10_EVENT_Selection_p1,TIME_10_EVENT_SetControlGroup_p1
0,0,0.072727,0.0,0.818182,0.0,0.0,0.036364,0.054545,0.018182,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0.064935,0.0,0.25974,0.0,0.207792,0.116883,0.298701,0.051948,0.024845,...,0.086093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.04065,0.0,0.390244,0.0,0.308943,0.097561,0.105691,0.056911,0.028986,...,0.206704,0.011173,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0.035714,0.0,0.241071,0.0,0.241071,0.25,0.205357,0.026786,0.016667,...,0.047297,0.006757,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0.075758,0.0,0.454545,0.0,0.060606,0.257576,0.121212,0.030303,0.015625,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
te_time_eve = time_n_event(te)
te_time_eve.fillna(0, inplace=True)




  0%|                                                                                           | 0/11 [00:00<?, ?it/s][A[A[A


  9%|███████▌                                                                           | 1/11 [00:05<00:52,  5.28s/it][A[A[A


 18%|███████████████                                                                    | 2/11 [00:06<00:35,  3.99s/it][A[A[A


 27%|██████████████████████▋                                                            | 3/11 [00:07<00:24,  3.09s/it][A[A[A


 36%|██████████████████████████████▏                                                    | 4/11 [00:08<00:17,  2.45s/it][A[A[A


 45%|█████████████████████████████████████▋                                             | 5/11 [00:09<00:11,  2.00s/it][A[A[A


 55%|█████████████████████████████████████████████▎                                     | 6/11 [00:10<00:08,  1.66s/it][A[A[A


 64%|████████████████████████████████████████████████████▊                      

In [39]:
te_time_eve.head()

Unnamed: 0,game_id,TIME_0_EVENT_Ability_p0,TIME_0_EVENT_AddToControlGroup_p0,TIME_0_EVENT_Camera_p0,TIME_0_EVENT_ControlGroup_p0,TIME_0_EVENT_GetControlGroup_p0,TIME_0_EVENT_Right Click_p0,TIME_0_EVENT_Selection_p0,TIME_0_EVENT_SetControlGroup_p0,TIME_1_EVENT_Ability_p0,...,TIME_9_EVENT_Selection_p1,TIME_9_EVENT_SetControlGroup_p1,TIME_10_EVENT_Ability_p1,TIME_10_EVENT_AddToControlGroup_p1,TIME_10_EVENT_Camera_p1,TIME_10_EVENT_ControlGroup_p1,TIME_10_EVENT_GetControlGroup_p1,TIME_10_EVENT_Right Click_p1,TIME_10_EVENT_Selection_p1,TIME_10_EVENT_SetControlGroup_p1
0,38872,0.051282,0.0,0.487179,0.0,0.025641,0.282051,0.102564,0.051282,0.071429,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,38873,0.032258,0.0,0.596774,0.0,0.0,0.096774,0.274194,0.0,0.117647,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,38874,0.055556,0.0,0.622222,0.0,0.011111,0.133333,0.155556,0.022222,0.025424,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,38875,0.055046,0.073394,0.348624,0.0,0.155963,0.229358,0.137615,0.0,0.016043,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,38876,0.016529,0.0,0.297521,0.0,0.566116,0.066116,0.045455,0.008264,0.004785,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Save feature files

In [46]:
# tr_time_eve.to_feather("../dacon_sc/input/tr_time_eve.feather")
# te_time_eve.to_feather("../dacon_sc/input/te_time_eve.feather")

In [40]:
tr_y = tr.groupby(['game_id'])['winner'].max()
tr_y = tr_y.reset_index()
train_Y = tr_y['winner']
tr_y.head()

Unnamed: 0,game_id,winner
0,0,1
1,1,1
2,2,0
3,3,0
4,4,0


In [None]:
def diff_all(df):
    p0_cols = df[df.filter(regex='p0').columns.tolist()].select_dtypes(exclude=[object,'category']).columns.tolist()
    p1_cols = df[df.filter(regex='p1').columns.tolist()].select_dtypes(exclude=[object,'category']).columns.tolist()
    
    for a, b in tqdm(zip(p0_cols, p1_cols), total=len(p0_cols)):
        df["{}_{}_diff".format(a,b)] = df[a] - df[b]
    
    return df

In [None]:
tr_time_eve = diff_all(tr_time_eve)
te_time_eve = diff_all(te_time_eve)

In [41]:
import time
from sklearn import metrics
from operator import itemgetter
import lightgbm as lgb

In [42]:
from sklearn.model_selection import KFold

def LGB_KFOLD_BINA(n_fold, train_X, test_X, metric, lr, num_leaves, max_depth):
    
    folds = KFold(n_splits=n_fold, shuffle=True, random_state=42)

    oof_lgb = np.zeros(len(train_X))
    predictions = np.zeros(len(test_X))
    feature_importance_df = pd.DataFrame()
    cv_score_df = []

    # Model parameters
    lgb_params = {'num_leaves': num_leaves,
                 'min_data_in_leaf': 20, 
                 'objective':'binary',
                 'max_depth': max_depth,
                 'learning_rate': lr,
                 "boosting": "gbdt",
                 "feature_fraction": 0.3,
                 "bagging_freq": 1,
                 "bagging_fraction": 0.7,
                 "bagging_seed": 42,
                 "metric": metric,
                 "lambda_l1": 0.0,
                 "verbosity": 300,
                 "nthread": -1,
                 "random_state": 42}

    model_start = time.time()

    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_X)):
        strLog = "fold {}".format(fold_)
        print(strLog+ "-" * 50)

        X_tr, X_val = train_X.iloc[trn_idx], train_X.iloc[val_idx]
        y_tr, y_val = train_Y.iloc[trn_idx], train_Y.iloc[val_idx]

        model = lgb.LGBMClassifier(**lgb_params, n_estimators = 200000, n_jobs = -1)
        model.fit(X_tr, 
                  y_tr, 
                  eval_set=[(X_tr, y_tr), (X_val, y_val)], 
                  eval_metric=metric,
                  verbose=300, 
                  early_stopping_rounds=200)
        oof_lgb[val_idx] = model.predict_proba(X_val, num_iteration=model.best_iteration_)[:,1]

        cv_score_df.append(model.best_score_)

        #feature importance
        fold_importance_df = pd.DataFrame()
        fold_importance_df["Feature"] = train_X.columns
        fold_importance_df["importance"] = model.feature_importances_[:len(train_X.columns)]
        fold_importance_df["fold"] = fold_ + 1
        feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
        #predictions
        predictions += model.predict_proba(test_X, num_iteration=model.best_iteration_)[:,1] / folds.n_splits

    cv_score_df = pd.DataFrame.from_dict(cv_score_df)
    cv_score_df = cv_score_df.valid_1.tolist()
    cv_score_df = list(map(itemgetter(metric),cv_score_df))

    print("-" * 50)
    #print("SF   RMSE = {}".format(oof_score))
    print("Mean "+ metric + " = {}".format(np.mean(cv_score_df)))
    print("Std "+ metric + " = {}".format(np.std(cv_score_df)))
#     lgb.plot_metric(model, metric=metric, title='auc plot', xlabel='Iterations', ylabel='auto', figsize=(10,8), grid=False)

    model_end = time.time()
    model_elapsed = model_end - model_start
    print('Model elapsed {0:0.2f}'.format(model_elapsed/60), "minutes.")
    cols = (feature_importance_df[["Feature", "importance"]]
        .groupby("Feature")
        .mean()
        .sort_values(by="importance", ascending=False)[:].index)
    best_features = feature_importance_df.loc[feature_importance_df.Feature.isin(cols)]
    pd.set_option('display.max_rows', 500)
    best_features = best_features.drop(['fold'],axis=1)
    best_features = best_features.groupby(['Feature'], as_index = False).mean()
    best_features['Feature Rank'] = best_features['importance'].rank(ascending=0)
    best_features = best_features.sort_values('Feature Rank', ascending = True)
    print(best_features.loc[best_features['importance']!=0].head(100))

    return predictions, oof_lgb, np.mean(cv_score_df), np.std(cv_score_df), best_features

In [43]:
tr_time_eve.dtypes.value_counts()

float64    176
int64        1
dtype: int64

In [45]:
#early stopping 200
pred, oof, cv, cv_std, best_feat = LGB_KFOLD_BINA(5, tr_time_eve.drop('game_id',axis=1), te_time_eve.drop('game_id',axis=1), "auc", lr=0.003, num_leaves=700, max_depth=-1)

fold 0--------------------------------------------------
Training until validation scores don't improve for 200 rounds
[300]	training's auc: 0.996567	valid_1's auc: 0.613465
[600]	training's auc: 0.999508	valid_1's auc: 0.616706
[900]	training's auc: 0.999859	valid_1's auc: 0.619416
[1200]	training's auc: 0.999923	valid_1's auc: 0.621226
[1500]	training's auc: 0.999964	valid_1's auc: 0.622909
[1800]	training's auc: 0.999991	valid_1's auc: 0.623236
Early stopping, best iteration is:
[1682]	training's auc: 0.999982	valid_1's auc: 0.62364
fold 1--------------------------------------------------
Training until validation scores don't improve for 200 rounds
[300]	training's auc: 0.996433	valid_1's auc: 0.625617
[600]	training's auc: 0.999398	valid_1's auc: 0.629553
[900]	training's auc: 0.999774	valid_1's auc: 0.630966
[1200]	training's auc: 0.999867	valid_1's auc: 0.632419
[1500]	training's auc: 0.999944	valid_1's auc: 0.633747
[1800]	training's auc: 0.999984	valid_1's auc: 0.634398
Early 

In [45]:
#early stopping 200
pred, oof, cv, cv_std, best_feat = LGB_KFOLD_BINA(5, tr_time_eve.drop('game_id',axis=1), te_time_eve.drop('game_id',axis=1), "auc", lr=0.003, num_leaves=700, max_depth=-1)

fold 0--------------------------------------------------
Training until validation scores don't improve for 200 rounds
[300]	training's auc: 0.996567	valid_1's auc: 0.613465
[600]	training's auc: 0.999508	valid_1's auc: 0.616706
[900]	training's auc: 0.999859	valid_1's auc: 0.619416
[1200]	training's auc: 0.999923	valid_1's auc: 0.621226
[1500]	training's auc: 0.999964	valid_1's auc: 0.622909
[1800]	training's auc: 0.999991	valid_1's auc: 0.623236
Early stopping, best iteration is:
[1682]	training's auc: 0.999982	valid_1's auc: 0.62364
fold 1--------------------------------------------------
Training until validation scores don't improve for 200 rounds
[300]	training's auc: 0.996433	valid_1's auc: 0.625617
[600]	training's auc: 0.999398	valid_1's auc: 0.629553
[900]	training's auc: 0.999774	valid_1's auc: 0.630966
[1200]	training's auc: 0.999867	valid_1's auc: 0.632419
[1500]	training's auc: 0.999944	valid_1's auc: 0.633747
[1800]	training's auc: 0.999984	valid_1's auc: 0.634398
Early 

In [8]:
%%time
# day_list = ['Friday','Monday','Saturday','Sunday','Thursday','Tuesday','Wednesday']
tr_p0 = tr[tr['player']==0].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
tr_p0 = tr_p0.reset_index()
tr_p1 = tr[tr['player']==1].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
tr_p1 = tr_p1.reset_index()
te_p0 = te[te['player']==0].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
te_p0 = te_p0.reset_index()
te_p1 = te[te['player']==1].groupby(['game_id','player'])['event'].value_counts(dropna=False, normalize=True).unstack()
te_p1 = te_p1.reset_index()

CPU times: user 21.5 s, sys: 3.58 s, total: 25.1 s
Wall time: 25.2 s


In [9]:
tr_p0.columns.name =""
tr_p1.columns.name =""
te_p0.columns.name =""
te_p1.columns.name =""

In [10]:
p0_cols = ["p0_"+x for x in tr_p0.columns[2:]]
p1_cols = ["p1_"+x for x in tr_p1.columns[2:]]

In [11]:
tr_p0.columns = ['game_id','player']+p0_cols
tr_p1.columns = ['game_id','player']+p1_cols
te_p0.columns = ['game_id','player']+p0_cols
te_p1.columns = ['game_id','player']+p1_cols

In [12]:
print(tr_p0.shape)
tr_p0.head()

(38872, 10)


Unnamed: 0,game_id,player,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup
0,0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068
1,1,0,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177
2,2,0,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451
3,3,0,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432
4,4,0,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352


In [13]:
tr_p1.head()

Unnamed: 0,game_id,player,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,1,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,1,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,1,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,1,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,1,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [14]:
te_p1.head()

Unnamed: 0,game_id,player,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,38872,1,0.053147,0.005594,0.653147,,0.06014,0.170629,0.057343,
1,38873,1,0.071979,0.002571,0.375321,,0.313625,0.174807,0.048843,0.012853
2,38874,1,0.035194,,0.26699,,0.51699,0.131068,0.043689,0.006068
3,38875,1,0.067207,0.001159,0.413673,,0.02781,0.373117,0.114716,0.002317
4,38876,1,0.037559,,0.755869,,0.004695,0.032864,0.164319,0.004695


In [15]:
new_tr = tr_p0.drop('player',axis=1).merge(tr_p1.drop('player',axis=1), on='game_id', how='left')
new_te = te_p0.drop('player',axis=1).merge(te_p1.drop('player',axis=1), on='game_id', how='left')

In [16]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [17]:
tr_set.head()

Unnamed: 0,game_id,p0_sum,p0_mean,p0_std,p0_count,p0_last,p1_sum,p1_mean,p1_std,p1_count,p1_last,p0_species,p1_species
0,0,140456,237.256757,122.917337,592,444,136478,249.047445,101.627387,548,444,T,T
1,1,369113,301.809485,175.342738,1223,599,387048,319.610239,169.485186,1211,599,P,T
2,2,253983,298.452409,183.566955,851,599,457492,333.692195,167.738309,1371,599,P,Z
3,3,437299,314.830094,171.846653,1389,599,456423,281.395191,171.076841,1622,597,T,P
4,4,178444,184.724638,89.94578,966,339,102764,157.855607,99.618136,651,343,T,Z


In [18]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,p1_AddToControlGroup,p1_Camera,p1_ControlGroup,p1_GetControlGroup,p1_Right Click,p1_Selection,p1_SetControlGroup
0,0,0.057432,0.003378,0.75,,0.040541,0.059122,0.084459,0.005068,0.062044,,0.775547,,0.005474,0.051095,0.104015,0.001825
1,1,0.06296,0.000818,0.512674,,0.132461,0.130826,0.152085,0.008177,0.055326,,0.708505,,0.025599,0.108175,0.095789,0.006606
2,2,0.081081,0.007051,0.485311,,0.116334,0.188014,0.105758,0.016451,0.061999,0.003647,0.528811,0.001459,0.079504,0.148796,0.16922,0.006565
3,3,0.059035,,0.513319,,0.095032,0.198704,0.12959,0.00432,0.054871,,0.239211,,0.437731,0.165228,0.091245,0.011714
4,4,0.059006,0.001035,0.445135,,0.231884,0.18323,0.069358,0.010352,0.0553,0.006144,0.417819,,0.152074,0.162826,0.193548,0.012289


In [19]:
new_tr.fillna(0, inplace=True)
new_te.fillna(0, inplace=True)

In [20]:
%%time
new_tr = new_tr.merge(tr_set, on='game_id',how='left')
new_te = new_te.merge(te_set, on='game_id',how='left')

CPU times: user 24.9 ms, sys: 0 ns, total: 24.9 ms
Wall time: 24.6 ms


In [21]:
new_tr.dtypes

game_id                    int64
p0_Ability               float64
p0_AddToControlGroup     float64
p0_Camera                float64
p0_ControlGroup          float64
p0_GetControlGroup       float64
p0_Right Click           float64
p0_Selection             float64
p0_SetControlGroup       float64
p1_Ability               float64
p1_AddToControlGroup     float64
p1_Camera                float64
p1_ControlGroup          float64
p1_GetControlGroup       float64
p1_Right Click           float64
p1_Selection             float64
p1_SetControlGroup       float64
p0_sum                     int64
p0_mean                  float64
p0_std                   float64
p0_count                   int64
p0_last                    int64
p1_sum                     int64
p1_mean                  float64
p1_std                   float64
p1_count                   int64
p1_last                    int64
p0_species              category
p1_species              category
dtype: object

In [22]:
new_tr.head()

Unnamed: 0,game_id,p0_Ability,p0_AddToControlGroup,p0_Camera,p0_ControlGroup,p0_GetControlGroup,p0_Right Click,p0_Selection,p0_SetControlGroup,p1_Ability,...,p0_std,p0_count,p0_last,p1_sum,p1_mean,p1_std,p1_count,p1_last,p0_species,p1_species
0,0,0.057432,0.003378,0.75,0.0,0.040541,0.059122,0.084459,0.005068,0.062044,...,122.917337,592,444,136478,249.047445,101.627387,548,444,T,T
1,1,0.06296,0.000818,0.512674,0.0,0.132461,0.130826,0.152085,0.008177,0.055326,...,175.342738,1223,599,387048,319.610239,169.485186,1211,599,P,T
2,2,0.081081,0.007051,0.485311,0.0,0.116334,0.188014,0.105758,0.016451,0.061999,...,183.566955,851,599,457492,333.692195,167.738309,1371,599,P,Z
3,3,0.059035,0.0,0.513319,0.0,0.095032,0.198704,0.12959,0.00432,0.054871,...,171.846653,1389,599,456423,281.395191,171.076841,1622,597,T,P
4,4,0.059006,0.001035,0.445135,0.0,0.231884,0.18323,0.069358,0.010352,0.0553,...,89.94578,966,339,102764,157.855607,99.618136,651,343,T,Z


In [23]:
new_tr.to_feather("../dacon_sc/input/new_tr.feather")
new_te.to_feather("../dacon_sc/input/new_te.feather")