# 1. Import & Data load

In [1]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import eli5
from eli5.sklearn import PermutationImportance
from scipy.stats import skew
from scipy.stats import kurtosis
from sklearn.cluster import KMeans
import math
pi = math.pi
pd.options.display.max_columns=500

train=pd.read_csv('./raw/train_features.csv')
y=pd.read_csv('./raw//train_labels.csv')
test=pd.read_csv('./raw//test_features.csv')
sub=pd.read_csv('./raw//sample_submission.csv')

# 2. Custom function

In [2]:
def range_func(x):
    max_val = np.max(x)
    min_val = np.min(x)
    range_val = max_val - min_val
    return range_val

def iqr_func2(x):
    q3, q1 = np.percentile(x, [20, 80])
    iqr = q3 - q1
    return iqr

def iqr_func3(x):
    q3, q1 = np.percentile(x, [40, 60])
    iqr = q3 - q1
    return iqr

def iqr_func4(x):
    q3, q1 = np.percentile(x, [15, 95])
    iqr = q3 - q1
    return iqr

def premad(x):
    return np.median(np.absolute(x-np.median(x,axis=0)),axis=0)

def preskew(x):
    return skew(x)

def prekurt(x):
    return kurtosis(x,fisher=True)

# 3. Create change data
- 시간 변동에 따른 수치 변화 데이터

In [4]:
train_change=pd.DataFrame()
change_train=train.set_index('id').drop('time',axis=1)
for j in train['id'].unique():
    for i in range(599):
        train_change=train_change.append(change_train[change_train.index==j].iloc[i+1,:]-change_train[change_train.index==j].iloc[i,:])

test_change=pd.DataFrame()
change_test=test.set_index('id').drop('time',axis=1)
for j in test['id'].unique():
    for i in range(599):
        test_change=test_change.append(change_test[change_test.index==j].iloc[i+1,:]-change_test[change_test.index==j].iloc[i,:])

# 4. Feature engineering

### Add vectors

In [None]:
train['acc_vector']=np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2)+(train['acc_z'] ** 2))
train['gy_vector']=np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2)+(train['gy_z'] ** 2))

test['acc_vector']=np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2)+(test['acc_z'] ** 2))
test['gy_vector']=np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2)+(test['gy_z'] ** 2))

train['acc_YZvector']=np.sqrt((train['acc_y'] ** 2) +(train['acc_z'] ** 2))
train['gy_YZvector']=np.sqrt((train['gy_y'] ** 2) +(train['gy_z'] ** 2))

train['acc_XYvector']=np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2))
train['gy_XYvector']=np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2))

train['acc_XZvector']=np.sqrt((train['acc_x'] ** 2) +(train['acc_z'] ** 2))
train['gy_XZvector']=np.sqrt((train['gy_x'] ** 2) +(train['gy_z'] ** 2))

test['acc_YZvector']=np.sqrt((test['acc_y'] ** 2) +(test['acc_z'] ** 2))
test['gy_YZvector']=np.sqrt((test['gy_y'] ** 2) +(test['gy_z'] ** 2))

test['acc_XYvector']=np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2))
test['gy_XYvector']=np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2))

test['acc_XZvector']=np.sqrt((test['acc_x'] ** 2) +(test['acc_z'] ** 2))
test['gy_XZvector']=np.sqrt((test['gy_x'] ** 2) +(test['gy_z'] ** 2))

### Add center of gravity

In [None]:
# 자이로스코프 무게중심
train['gy_Centerofgravity']=(train['gy_x']+train['gy_y']+train['gy_z'])/3
test['gy_Centerofgravity']=(test['gy_x']+test['gy_y']+test['gy_z'])/3

### Add roll & pitch

In [3]:
# roll & pitch
train['roll'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2))
test['roll'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2))

train['pitch'] = np.arctan(train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2))
test['pitch'] = np.arctan(test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2))

train['math_roll'] = np.arctan(- train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2)) * (180/pi)
train['math_pitch'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2)) * (180/pi)

test['math_roll'] = np.arctan(- test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2)) * (180/pi)
test['math_pitch'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2)) * (180/pi)

train['gy_roll'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2))
test['gy_roll'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2))

train['gy_pitch'] = np.arctan(train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2))
test['gy_pitch'] = np.arctan(test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2))

train['gy_math_roll'] = np.arctan(- train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2)) * (180/pi)
train['gy_math_pitch'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2)) * (180/pi)

test['gy_math_roll'] = np.arctan(- test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2)) * (180/pi)
test['gy_math_pitch'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2)) * (180/pi)

### Apply the custom function

In [62]:
features = ['id', 'acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z','acc_vector','gy_vector'
            ,'acc_YZvector','gy_YZvector','acc_XYvector','gy_XYvector','acc_XZvector','gy_XZvector','gy_Centerofgravity']
features2 = ['id', 'roll','pitch','math_roll','math_pitch', 'gy_roll','gy_pitch','gy_math_roll','gy_math_pitch']

train_preprocess = train[features].groupby('id').agg(['max', 'min', 'mean','std','median',range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
temp_train_preprocess = train[features2].groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
test_preprocess = test[features].groupby('id').agg(['max', 'min', 'mean','std','median',range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
temp_test_preprocess = test[features2].groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])

train_preprocess=pd.concat([train_preprocess,temp_train_preprocess],axis=1)
test_preprocess=pd.concat([test_preprocess,temp_test_preprocess],axis=1)

train_preprocess.columns=[i[0]+'_'+i[1] for i in train_preprocess.columns]
test_preprocess.columns=[i[0]+'_'+i[1] for i in test_preprocess.columns]

### Add std mean

In [63]:
train_preprocess['acc_std_mean']=(train_preprocess['acc_x_std']+train_preprocess['acc_y_std']+ train_preprocess['acc_z_std'])/3
train_preprocess['gy_std_mean']=(train_preprocess['gy_x_std']+train_preprocess['gy_y_std']+ train_preprocess['gy_z_std'])/3

test_preprocess['acc_std_mean']=(test_preprocess['acc_x_std']+test_preprocess['acc_y_std']+ test_preprocess['acc_z_std'])/3
test_preprocess['gy_std_mean']=(test_preprocess['gy_x_std']+test_preprocess['gy_y_std']+ test_preprocess['gy_z_std'])/3

### Add the front part of data

In [65]:
# 가속도계 첫번째 데이터
train_acc_head1=pd.DataFrame()
for i in train['id'].unique():
    train_acc_head1=train_acc_head1.append(train[['id','acc_x','acc_y','acc_z']][train[['id','acc_x','acc_y','acc_z']]['id']==i].head(1))
train_acc_head1.columns=['id','first_acc_x','first_acc_y','first_acc_z']
train_acc_head1.set_index('id',inplace=True)

test_acc_head1=pd.DataFrame()
for i in test['id'].unique():
    test_acc_head1=test_acc_head1.append(test[['id','acc_x','acc_y','acc_z']][test[['id','acc_x','acc_y','acc_z']]['id']==i].head(1))
test_acc_head1.columns=['id','first_acc_x','first_acc_y','first_acc_z']
test_acc_head1.set_index('id',inplace=True)

# 가속도계 첫 3초
train_acc_head=pd.DataFrame()
for i in train['id'].unique():
    train_acc_head=train_acc_head.append(train[['id','acc_x','acc_y','acc_z']][train[['id','acc_x','acc_y','acc_z']]['id']==i].head(150))
train_acc_head.columns=['id','head_acc_x','head_acc_y','head_acc_z']
train_acc_head=train_acc_head.groupby('id').mean()

test_acc_head=pd.DataFrame()
for i in test['id'].unique():
    test_acc_head=test_acc_head.append(test[['id','acc_x','acc_y','acc_z']][test[['id','acc_x','acc_y','acc_z']]['id']==i].head(150))
test_acc_head.columns=['id','head_acc_x','head_acc_y','head_acc_z']
test_acc_head=test_acc_head.groupby('id').mean()

train_preprocess=pd.concat([train_preprocess,train_acc_head,train_acc_head1],axis=1)
test_preprocess=pd.concat([test_preprocess,test_acc_head,test_acc_head1],axis=1)

# 자이로스코프 첫 3초
train_gy_head=pd.DataFrame()
for i in train['id'].unique():
    train_gy_head=train_gy_head.append(train[['id','gy_x','gy_y','gy_z']][train[['id','gy_x','gy_y','gy_z']]['id']==i].head(150))
train_gy_head.columns=['id','head_gy_x','head_gy_y','head_gy_z']
train_gy_head=train_gy_head.groupby('id').mean()

test_gy_head=pd.DataFrame()
for i in test['id'].unique():
    test_gy_head=test_gy_head.append(test[['id','gy_x','gy_y','gy_z']][test[['id','gy_x','gy_y','gy_z']]['id']==i].head(150))
test_gy_head.columns=['id','head_gy_x','head_gy_y','head_gy_z']
test_gy_head=test_gy_head.groupby('id').mean()

train_preprocess=pd.concat([train_preprocess,train_gy_head],axis=1)
test_preprocess=pd.concat([test_preprocess,test_gy_head],axis=1)

### Add change data

In [77]:
# train 변화량 데이터 추가
temp=train_change[['id','acc_x','acc_y','acc_z']]
temp.columns=['id','Cacc_x','Cacc_y','Cacc_z']
temp['Croll'] = np.arctan(temp['Cacc_y']/np.sqrt(temp['Cacc_x'] ** 2 + temp['Cacc_z'] ** 2))
temp['Cpitch'] = np.arctan(temp['Cacc_x']/np.sqrt(temp['Cacc_y'] ** 2 + temp['Cacc_z'] ** 2))
temp['Cmath_roll'] = np.arctan(- temp['Cacc_x']/np.sqrt(temp['Cacc_y'] ** 2 + temp['Cacc_z'] ** 2)) * (180/pi)
temp['Cmath_pitch'] = np.arctan(temp['Cacc_y']/np.sqrt(temp['Cacc_x'] ** 2 + temp['Cacc_z'] ** 2)) * (180/pi)
temp=temp.groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
train_preprocess=pd.concat([train_preprocess,temp],axis=1)

temp=train_change[['id','gy_x','gy_y','gy_z']]
temp.columns=['id','Cgy_x','Cgy_y','Cgy_z']
temp['Cgyroll'] = np.arctan(temp['Cgy_y']/np.sqrt(temp['Cgy_x'] ** 2 + temp['Cgy_z'] ** 2))
temp['Cgypitch'] = np.arctan(temp['Cgy_x']/np.sqrt(temp['Cgy_y'] ** 2 + temp['Cgy_z'] ** 2))
temp['Cgymath_roll'] = np.arctan(- temp['Cgy_x']/np.sqrt(temp['Cgy_y'] ** 2 + temp['Cgy_z'] ** 2)) * (180/pi)
temp['Cgymath_pitch'] = np.arctan(temp['Cgy_y']/np.sqrt(temp['Cgy_x'] ** 2 + temp['Cgy_z'] ** 2)) * (180/pi)
temp=temp.groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
train_preprocess=pd.concat([train_preprocess,temp],axis=1)

# test 변화량 데이터 추가
temp=test_change[['id','acc_x','acc_y','acc_z']]
temp.columns=['id','Cacc_x','Cacc_y','Cacc_z']
temp['Croll'] = np.arctan(temp['Cacc_y']/np.sqrt(temp['Cacc_x'] ** 2 + temp['Cacc_z'] ** 2))
temp['Cpitch'] = np.arctan(temp['Cacc_x']/np.sqrt(temp['Cacc_y'] ** 2 + temp['Cacc_z'] ** 2))
temp['Cmath_roll'] = np.arctan(- temp['Cacc_x']/np.sqrt(temp['Cacc_y'] ** 2 + temp['Cacc_z'] ** 2)) * (180/pi)
temp['Cmath_pitch'] = np.arctan(temp['Cacc_y']/np.sqrt(temp['Cacc_x'] ** 2 + temp['Cacc_z'] ** 2)) * (180/pi)
temp=temp.groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
test_preprocess=pd.concat([test_preprocess,temp],axis=1)

temp=test_change[['id','gy_x','gy_y','gy_z']]
temp.columns=['id','Cgy_x','Cgy_y','Cgy_z']
temp['Cgyroll'] = np.arctan(temp['Cgy_y']/np.sqrt(temp['Cgy_x'] ** 2 + temp['Cgy_z'] ** 2))
temp['Cgypitch'] = np.arctan(temp['Cgy_x']/np.sqrt(temp['Cgy_y'] ** 2 + temp['Cgy_z'] ** 2))
temp['Cgymath_roll'] = np.arctan(- temp['Cgy_x']/np.sqrt(temp['Cgy_y'] ** 2 + temp['Cgy_z'] ** 2)) * (180/pi)
temp['Cgymath_pitch'] = np.arctan(temp['Cgy_y']/np.sqrt(temp['Cgy_x'] ** 2 + temp['Cgy_z'] ** 2)) * (180/pi)
temp=temp.groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt])
test_preprocess=pd.concat([test_preprocess,temp],axis=1)

### Clustering - KMeans

In [67]:
model = KMeans(n_clusters=5,random_state=20)
model.fit(train_preprocess)
train_predict = model.predict(train_preprocess)
train_preprocess['cluster']=train_predict

test_predict = model.predict(test_preprocess)
test_preprocess['cluster']=test_predict

### Update columns name

In [68]:
column_name=list(train_preprocess.iloc[:,:247].columns)
column_name.extend([i[0]+ '-' + i[1] for i in train_preprocess.iloc[:,247:-1].columns])
column_name.extend(list(train_preprocess.iloc[:,-1:].columns))

train_preprocess.columns=column_name
test_preprocess.columns=column_name

In [76]:
train_preprocess.head(2)

Unnamed: 0_level_0,acc_x_max,acc_x_min,acc_x_mean,acc_x_std,acc_x_median,acc_x_range_func,acc_x_iqr_func2,acc_x_iqr_func3,acc_x_iqr_func4,acc_x_premad,acc_x_preskew,acc_x_prekurt,acc_y_max,acc_y_min,acc_y_mean,acc_y_std,acc_y_median,acc_y_range_func,acc_y_iqr_func2,acc_y_iqr_func3,acc_y_iqr_func4,acc_y_premad,acc_y_preskew,acc_y_prekurt,acc_z_max,acc_z_min,acc_z_mean,acc_z_std,acc_z_median,acc_z_range_func,acc_z_iqr_func2,acc_z_iqr_func3,acc_z_iqr_func4,acc_z_premad,acc_z_preskew,acc_z_prekurt,gy_x_max,gy_x_min,gy_x_mean,gy_x_std,gy_x_median,gy_x_range_func,gy_x_iqr_func2,gy_x_iqr_func3,gy_x_iqr_func4,gy_x_premad,gy_x_preskew,gy_x_prekurt,gy_y_max,gy_y_min,gy_y_mean,gy_y_std,gy_y_median,gy_y_range_func,gy_y_iqr_func2,gy_y_iqr_func3,gy_y_iqr_func4,gy_y_premad,gy_y_preskew,gy_y_prekurt,gy_z_max,gy_z_min,gy_z_mean,gy_z_std,gy_z_median,gy_z_range_func,gy_z_iqr_func2,gy_z_iqr_func3,gy_z_iqr_func4,gy_z_premad,gy_z_preskew,gy_z_prekurt,acc_vector_max,acc_vector_min,acc_vector_mean,acc_vector_std,acc_vector_median,acc_vector_range_func,acc_vector_iqr_func2,acc_vector_iqr_func3,acc_vector_iqr_func4,acc_vector_premad,acc_vector_preskew,acc_vector_prekurt,gy_vector_max,gy_vector_min,gy_vector_mean,gy_vector_std,gy_vector_median,gy_vector_range_func,gy_vector_iqr_func2,gy_vector_iqr_func3,gy_vector_iqr_func4,gy_vector_premad,gy_vector_preskew,gy_vector_prekurt,acc_YZvector_max,acc_YZvector_min,acc_YZvector_mean,acc_YZvector_std,acc_YZvector_median,acc_YZvector_range_func,acc_YZvector_iqr_func2,acc_YZvector_iqr_func3,acc_YZvector_iqr_func4,acc_YZvector_premad,acc_YZvector_preskew,acc_YZvector_prekurt,gy_YZvector_max,gy_YZvector_min,gy_YZvector_mean,gy_YZvector_std,gy_YZvector_median,gy_YZvector_range_func,gy_YZvector_iqr_func2,gy_YZvector_iqr_func3,gy_YZvector_iqr_func4,gy_YZvector_premad,gy_YZvector_preskew,gy_YZvector_prekurt,acc_XYvector_max,acc_XYvector_min,acc_XYvector_mean,acc_XYvector_std,acc_XYvector_median,acc_XYvector_range_func,acc_XYvector_iqr_func2,acc_XYvector_iqr_func3,acc_XYvector_iqr_func4,acc_XYvector_premad,acc_XYvector_preskew,acc_XYvector_prekurt,gy_XYvector_max,gy_XYvector_min,gy_XYvector_mean,gy_XYvector_std,gy_XYvector_median,gy_XYvector_range_func,gy_XYvector_iqr_func2,gy_XYvector_iqr_func3,gy_XYvector_iqr_func4,gy_XYvector_premad,gy_XYvector_preskew,gy_XYvector_prekurt,acc_XZvector_max,acc_XZvector_min,acc_XZvector_mean,acc_XZvector_std,acc_XZvector_median,acc_XZvector_range_func,acc_XZvector_iqr_func2,acc_XZvector_iqr_func3,acc_XZvector_iqr_func4,acc_XZvector_premad,acc_XZvector_preskew,acc_XZvector_prekurt,gy_XZvector_max,gy_XZvector_min,gy_XZvector_mean,gy_XZvector_std,gy_XZvector_median,gy_XZvector_range_func,gy_XZvector_iqr_func2,gy_XZvector_iqr_func3,gy_XZvector_iqr_func4,gy_XZvector_premad,gy_XZvector_preskew,gy_XZvector_prekurt,gy_Centerofgravity_max,gy_Centerofgravity_min,gy_Centerofgravity_mean,gy_Centerofgravity_std,gy_Centerofgravity_median,gy_Centerofgravity_range_func,gy_Centerofgravity_iqr_func2,gy_Centerofgravity_iqr_func3,gy_Centerofgravity_iqr_func4,gy_Centerofgravity_premad,gy_Centerofgravity_preskew,gy_Centerofgravity_prekurt,roll_range_func,roll_iqr_func2,roll_iqr_func3,roll_iqr_func4,roll_premad,roll_preskew,roll_prekurt,pitch_range_func,pitch_iqr_func2,pitch_iqr_func3,pitch_iqr_func4,pitch_premad,pitch_preskew,pitch_prekurt,math_roll_range_func,math_roll_iqr_func2,math_roll_iqr_func3,math_roll_iqr_func4,math_roll_premad,math_roll_preskew,math_roll_prekurt,math_pitch_range_func,math_pitch_iqr_func2,math_pitch_iqr_func3,math_pitch_iqr_func4,math_pitch_premad,math_pitch_preskew,math_pitch_prekurt,gy_roll_range_func,gy_roll_iqr_func2,gy_roll_iqr_func3,gy_roll_iqr_func4,gy_roll_premad,gy_roll_preskew,gy_roll_prekurt,gy_pitch_range_func,gy_pitch_iqr_func2,gy_pitch_iqr_func3,gy_pitch_iqr_func4,gy_pitch_premad,gy_pitch_preskew,gy_pitch_prekurt,gy_math_roll_range_func,gy_math_roll_iqr_func2,gy_math_roll_iqr_func3,gy_math_roll_iqr_func4,gy_math_roll_premad,gy_math_roll_preskew,gy_math_roll_prekurt,gy_math_pitch_range_func,gy_math_pitch_iqr_func2,gy_math_pitch_iqr_func3,gy_math_pitch_iqr_func4,gy_math_pitch_premad,gy_math_pitch_preskew,gy_math_pitch_prekurt,acc_std_mean,gy_std_mean,head_acc_x,head_acc_y,head_acc_z,first_acc_x,first_acc_y,first_acc_z,head_gy_x,head_gy_y,head_gy_z,Cacc_x-range_func,Cacc_x-iqr_func2,Cacc_x-iqr_func3,Cacc_x-iqr_func4,Cacc_x-premad,Cacc_x-preskew,Cacc_x-prekurt,Cacc_y-range_func,Cacc_y-iqr_func2,Cacc_y-iqr_func3,Cacc_y-iqr_func4,Cacc_y-premad,Cacc_y-preskew,Cacc_y-prekurt,Cacc_z-range_func,Cacc_z-iqr_func2,Cacc_z-iqr_func3,Cacc_z-iqr_func4,Cacc_z-premad,Cacc_z-preskew,Cacc_z-prekurt,Croll-range_func,Croll-iqr_func2,Croll-iqr_func3,Croll-iqr_func4,Croll-premad,Croll-preskew,Croll-prekurt,Cpitch-range_func,Cpitch-iqr_func2,Cpitch-iqr_func3,Cpitch-iqr_func4,Cpitch-premad,Cpitch-preskew,Cpitch-prekurt,Cmath_roll-range_func,Cmath_roll-iqr_func2,Cmath_roll-iqr_func3,Cmath_roll-iqr_func4,Cmath_roll-premad,Cmath_roll-preskew,Cmath_roll-prekurt,Cmath_pitch-range_func,Cmath_pitch-iqr_func2,Cmath_pitch-iqr_func3,Cmath_pitch-iqr_func4,Cmath_pitch-premad,Cmath_pitch-preskew,Cmath_pitch-prekurt,Cgy_x-range_func,Cgy_x-iqr_func2,Cgy_x-iqr_func3,Cgy_x-iqr_func4,Cgy_x-premad,Cgy_x-preskew,Cgy_x-prekurt,Cgy_y-range_func,Cgy_y-iqr_func2,Cgy_y-iqr_func3,Cgy_y-iqr_func4,Cgy_y-premad,Cgy_y-preskew,Cgy_y-prekurt,Cgy_z-range_func,Cgy_z-iqr_func2,Cgy_z-iqr_func3,Cgy_z-iqr_func4,Cgy_z-premad,Cgy_z-preskew,Cgy_z-prekurt,Cgyroll-range_func,Cgyroll-iqr_func2,Cgyroll-iqr_func3,Cgyroll-iqr_func4,Cgyroll-premad,Cgyroll-preskew,Cgyroll-prekurt,Cgypitch-range_func,Cgypitch-iqr_func2,Cgypitch-iqr_func3,Cgypitch-iqr_func4,Cgypitch-premad,Cgypitch-preskew,Cgypitch-prekurt,Cgymath_roll-range_func,Cgymath_roll-iqr_func2,Cgymath_roll-iqr_func3,Cgymath_roll-iqr_func4,Cgymath_roll-premad,Cgymath_roll-preskew,Cgymath_roll-prekurt,Cgymath_pitch-range_func,Cgymath_pitch-iqr_func2,Cgymath_pitch-iqr_func3,Cgymath_pitch-iqr_func4,Cgymath_pitch-premad,Cgymath_pitch-preskew,Cgymath_pitch-prekurt,cluster
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1
0,1.344268,0.59194,0.931329,0.191479,0.956149,0.752327,-0.369662,-0.172673,-0.544985,0.164385,0.106076,-1.161667,0.176871,-0.624113,-0.218471,0.177131,-0.240638,0.800985,-0.339302,-0.12396,-0.468257,0.138511,0.054884,-0.881409,-0.054876,-0.786336,-0.370422,0.135131,-0.346749,0.731459,-0.230312,-0.060193,-0.353804,0.082231,-0.595532,-0.148805,31.644123,-46.254836,-1.865269,13.284216,-1.273569,77.898959,-21.73399,-6.42314,-32.746229,8.773819,-0.398648,0.211136,69.847244,-85.887677,-3.359506,24.300479,-2.36223,155.734921,-32.537213,-8.711901,-62.134828,13.181243,-0.204916,0.732569,55.953827,-79.930029,1.182107,25.275185,1.913286,135.883856,-38.84421,-9.41338,-63.741066,14.828431,-0.577464,0.366905,1.464711,0.700301,1.053002,0.172356,1.052954,0.764409,-0.328811,-0.130663,-0.491498,0.140697,0.178378,-0.981278,96.204555,1.978401,32.118575,19.715056,27.491634,94.226154,-32.373991,-10.771451,-59.674426,12.317919,0.923424,0.319576,0.794231,0.232832,0.473279,0.102563,0.467988,0.5614,-0.178997,-0.052528,-0.284987,0.072405,0.243839,-0.333986,94.548967,1.281162,28.110417,21.225888,21.458241,93.267805,-35.619047,-11.004654,-62.935609,12.799203,0.96537,0.17889,1.390464,0.595115,0.967736,0.215859,1.006181,0.795349,-0.423951,-0.205698,-0.597643,0.196965,0.051286,-1.299702,88.634207,0.930562,23.387205,15.293768,19.912021,87.703645,-24.420074,-6.823188,-44.473889,8.61002,1.096306,1.105979,1.42093,0.699634,1.018122,0.151276,1.013342,0.721296,-0.27887,-0.091615,-0.45108,0.117718,0.250073,-0.730244,82.06914,0.473673,24.576975,14.668129,21.917642,81.595467,-23.451244,-7.117599,-40.513998,9.512299,0.899471,0.85873,43.994765,-45.377153,-1.347556,16.556301,-1.259386,89.371917,-26.895298,-8.555599,-43.74559,10.716499,-0.081081,-0.204728,0.724141,-0.283079,-0.087735,-0.416993,0.103789,0.322907,-0.706524,0.708736,-0.209801,-0.04876,-0.323442,0.069249,-0.775604,0.025097,40.607573,-12.020709,-2.793739,-21.861214,3.96765,0.775604,0.025097,41.490237,-16.219237,-5.026859,-23.891958,5.946675,0.322907,-0.706524,2.833836,-1.305992,-0.490935,-1.791598,0.533758,0.094874,-1.064726,2.977117,-0.982953,-0.247226,-1.925501,0.351814,0.12856,-0.14317,170.576232,-56.319063,-14.164984,-92.578624,20.157464,-0.12856,-0.14317,162.366819,-74.827804,-28.128525,-102.651024,30.582057,0.094874,-1.064726,0.167914,20.953293,0.930401,-0.204424,-0.357682,1.206087,-0.179371,-0.148447,-2.033206,-7.620945,-1.012112,0.350553,-0.086247,-0.027968,-0.14036,0.034023,0.201069,0.184567,0.510419,-0.1088,-0.028574,-0.180126,0.041689,-0.224045,0.79973,0.500255,-0.105577,-0.028014,-0.188058,0.038816,0.166563,1.211711,3.034129,-1.458924,-0.420265,-2.101978,0.598439,-0.004368,-0.93404,2.871435,-1.060137,-0.331797,-1.652963,0.415447,0.051005,-0.513209,164.521122,-60.741397,-19.010576,-93.186134,23.803348,-0.051005,-0.513209,173.842806,-83.590203,-24.079415,-120.434447,34.288042,-0.004368,-0.93404,65.48026,-13.889571,-3.869127,-23.585541,5.419533,0.041505,0.868294,37.129643,-8.924823,-2.690859,-14.098464,3.699058,0.359877,0.502568,34.13491,-7.172572,-2.040204,-12.030779,2.833154,0.20025,0.693328,2.96486,-1.117733,-0.333715,-1.703317,0.447856,-0.003416,-0.584324,3.071607,-1.855611,-0.657876,-2.326526,0.774576,0.09075,-1.277056,175.990145,-106.31865,-37.693509,-133.518563,44.379921,-0.09075,-1.277056,169.873979,-64.041361,-19.120451,-97.59285,25.660234,-0.003416,-0.584324,2
1,1.23402,-2.156208,-0.76658,0.495528,-0.805767,3.390228,-0.89232,-0.259957,-1.126772,0.321762,-0.202959,0.229016,0.700065,-1.295598,-0.317258,0.336415,-0.228905,1.995663,-0.62478,-0.261024,-0.888156,0.232913,-0.145735,-0.589686,0.888661,-1.019531,-0.004223,0.499395,-0.034583,1.908192,-0.911407,-0.190953,-1.412183,0.38246,-0.137812,-0.764662,286.624363,-325.328531,11.0716,79.244561,3.81065,611.952894,-113.384251,-20.81652,-192.176608,37.677737,-0.345188,2.101102,389.60806,-315.096003,1.740475,96.005289,8.043707,704.704062,-81.603851,-20.981261,-193.633032,27.991032,-0.486011,2.828346,340.170199,-270.980823,1.393294,75.545343,-0.655819,611.151022,-87.20543,-15.41556,-165.339161,34.252342,0.013679,3.906192,2.196166,0.267319,1.096231,0.306356,1.051493,1.928846,-0.342386,-0.104838,-0.906596,0.130239,1.127573,1.962747,536.214788,0.993903,111.047766,94.759619,85.896843,535.220885,-139.487688,-37.597367,-260.125009,49.018489,1.610304,2.953581,1.344783,0.017934,0.614497,0.291843,0.657428,1.326849,-0.59837,-0.185355,-0.74988,0.238548,-0.285172,-0.998284,460.382431,0.353606,86.654114,86.067543,61.04299,460.028825,-113.475881,-30.858755,-256.10974,39.488449,1.756634,3.057427,2.159331,0.148458,0.942026,0.399093,0.916721,2.010872,-0.545287,-0.160073,-1.188818,0.198182,0.485998,0.525718,455.1807,0.61796,92.919778,83.510694,69.823901,454.56274,-121.940758,-29.32638,-243.763534,43.957704,1.653298,3.086353,2.193095,0.258524,0.974214,0.364569,0.951912,1.934571,-0.574976,-0.18191,-1.128214,0.212615,0.782876,0.624354,403.100923,0.771792,82.785383,72.433885,64.964235,402.329132,-110.01277,-35.747911,-206.412478,43.126617,1.537488,2.929244,307.888702,-282.173222,4.735123,65.497872,7.167186,590.061924,-50.7789,-12.641309,-133.801698,18.503918,-0.391348,5.02585,1.73944,-0.718567,-0.272615,-1.012047,0.228442,-0.389179,-0.938336,2.299613,-1.032235,-0.214801,-1.240473,0.386134,0.41678,-0.721744,131.758129,-59.142701,-12.307163,-72.968196,22.123866,-0.41678,-0.721744,99.662551,-41.170872,-15.619687,-57.986042,13.088737,-0.389179,-0.938336,3.082769,-1.296626,-0.379454,-1.989773,0.51729,-0.150883,-0.683533,2.866306,-1.475187,-0.395792,-1.982914,0.623829,-0.100541,-1.006218,164.227251,-84.522017,-22.677214,-119.037459,35.742781,0.100541,-1.006218,176.629662,-74.291169,-21.741119,-114.005603,29.638538,-0.150883,-0.683533,0.443779,83.598398,-0.313634,-0.567014,0.471395,-0.211795,-0.07876,0.854627,-26.182937,8.268198,10.465675,1.834542,-0.116587,-0.021564,-0.259661,0.038897,0.309264,15.576594,0.804631,-0.111154,-0.023733,-0.223176,0.040081,0.127286,3.494794,0.851648,-0.085792,-0.020637,-0.186626,0.033583,0.902761,5.991293,2.949186,-1.355521,-0.467312,-1.940425,0.577064,-0.012845,-0.94599,3.069593,-1.448206,-0.439248,-2.033844,0.578359,0.00044,-0.955137,175.874715,-82.976106,-25.167077,-118.545346,33.137543,-0.00044,-0.955137,168.9759,-77.665614,-26.774979,-111.178168,33.063357,-0.012845,-0.94599,273.314699,-21.121511,-4.788293,-60.485939,7.30574,0.256503,5.516639,254.7305,-18.228214,-3.362948,-40.85991,6.293965,0.320409,9.681797,131.191643,-11.465256,-2.487675,-33.794652,3.947651,0.068497,3.872017,3.001347,-1.227055,-0.369492,-1.899532,0.489228,-0.073283,-0.620949,3.078396,-1.67191,-0.498409,-2.271373,0.713602,0.044136,-1.145265,176.3791,-95.793405,-28.556755,-129.088877,40.886396,-0.044136,-1.145265,171.964513,-70.305098,-21.170336,-108.835181,28.030691,-0.073283,-0.620949,4


# 5. Fit & Predict

### Drop bad features
- permutation importance를 이용하였습니다.

In [None]:
drop_columns=['gy_roll_range_func','gy_XZvector_iqr_func3','gy_XZvector_median','gy_Centerofgravity_prekurt']

### Create 3 catboost model
- depth의 차이를 두었습니다.

In [None]:
cat_depth3=CatBoostClassifier(random_state=20,iterations=26000,task_type='GPU',depth=3,bootstrap_type ='Bernoulli',learning_rate = 0.007932)
cat_depth3.fit(train_preprocess.drop(drop_columns,axis=1), y['label'])
predict_depth3=cat_depth3.predict_proba(test_preprocess.drop(drop_columns,axis=1))

cat_depth4=CatBoostClassifier(random_state=20,iterations=22000,task_type='GPU',depth=4,bootstrap_type ='Bernoulli',learning_rate = 0.007932)
cat_depth4.fit(train_preprocess.drop(drop_columns,axis=1), y['label'])
predict_depth4=cat_depth4.predict_proba(test_preprocess.drop(drop_columns,axis=1))

cat_depth5=CatBoostClassifier(random_state=20,iterations=13000,task_type='GPU',depth=5,bootstrap_type ='Bernoulli')
cat_depth5.fit(train_preprocess.drop(drop_columns,axis=1), y['label'])
predict_depth5=cat_depth5.predict_proba(test_preprocess.drop(drop_columns,axis=1))

### Soft voting

In [None]:
predict= (predict_depth3 * 0.45) + (predict_depth4 * 0.35) + (predict_depth5 * 0.2)

### Submission

In [None]:
sub.iloc[:,1:]=predict
sub.to_csv('[final]Not_StratifiedKfold.csv', index=False)
sub