In [1]:
import os
from time import time

import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline
import seaborn as sns
from seaborn import countplot,lineplot, barplot

import numpy as np 
import pandas as pd 

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
from sklearn.metrics import accuracy_score

from scipy import stats
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from numba import jit
import itertools

from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings('ignore')
import gc
gc.enable()

!ls ../input/

X_test.csv  X_train.csv  sample_submission.csv	y_train.csv


In [2]:
train_raw = pd.read_csv('../input/X_train.csv')
test_raw = pd.read_csv('../input/X_test.csv')
target_raw = pd.read_csv('../input/y_train.csv')

In [3]:
def quaternion_to_euler(x, y, z, w):
    import math
    t0 = +2.0 * (w * x + y * z)
    t1 = +1.0 - 2.0 * (x * x + y * y)
    X = math.atan2(t0, t1)

    t2 = +2.0 * (w * y - z * x)
    t2 = +1.0 if t2 > +1.0 else t2
    t2 = -1.0 if t2 < -1.0 else t2
    Y = math.asin(t2)

    t3 = +2.0 * (w * z + x * y)
    t4 = +1.0 - 2.0 * (y * y + z * z)
    Z = math.atan2(t3, t4)

    return X, Y, Z

In [4]:
def fe_step0 (actual):
    
    # https://www.mathworks.com/help/aeroblks/quaternionnorm.html
    # https://www.mathworks.com/help/aeroblks/quaternionmodulus.html
    # https://www.mathworks.com/help/aeroblks/quaternionnormalize.html
        
    actual['norm_quat'] = (actual['orientation_X']**2 + actual['orientation_Y']**2 + actual['orientation_Z']**2 + actual['orientation_W']**2)
    actual['mod_quat'] = (actual['norm_quat'])**0.5
    actual['norm_X'] = actual['orientation_X'] / actual['mod_quat']
    actual['norm_Y'] = actual['orientation_Y'] / actual['mod_quat']
    actual['norm_Z'] = actual['orientation_Z'] / actual['mod_quat']
    actual['norm_W'] = actual['orientation_W'] / actual['mod_quat']
    
    return actual

In [5]:
train_raw = fe_step0(train_raw)
test_raw = fe_step0(test_raw)

In [6]:
def fe_step1 (actual):
    """Quaternions to Euler Angles"""
    
    x, y, z, w = actual['norm_X'].tolist(), actual['norm_Y'].tolist(), actual['norm_Z'].tolist(), actual['norm_W'].tolist()
    nx, ny, nz = [], [], []
    for i in range(len(x)):
        xx, yy, zz = quaternion_to_euler(x[i], y[i], z[i], w[i])
        nx.append(xx)
        ny.append(yy)
        nz.append(zz)
    
    actual['euler_x'] = nx
    actual['euler_y'] = ny
    actual['euler_z'] = nz
    return actual

In [7]:
train_raw = fe_step1(train_raw)
test_raw = fe_step1(test_raw)

In [8]:
def feat_eng(data):
    
    df = pd.DataFrame()
    data['totl_anglr_vel'] = (data['angular_velocity_X']**2 + data['angular_velocity_Y']**2 + data['angular_velocity_Z']**2)** 0.5
    data['totl_linr_acc'] = (data['linear_acceleration_X']**2 + data['linear_acceleration_Y']**2 + data['linear_acceleration_Z']**2)**0.5
    data['totl_xyz'] = (data['orientation_X']**2 + data['orientation_Y']**2 + data['orientation_Z']**2)**0.5
    data['acc_vs_vel'] = data['totl_linr_acc'] / data['totl_anglr_vel']
    
    def mean_change_of_abs_change(x):
        return np.mean(np.diff(np.abs(np.diff(x))))
    
    for col in data.columns:
        if col in ['row_id','series_id','measurement_number']:
            continue
        df[col + '_mean'] = data.groupby(['series_id'])[col].mean()
        df[col + '_median'] = data.groupby(['series_id'])[col].median()
        df[col + '_max'] = data.groupby(['series_id'])[col].max()
        df[col + '_min'] = data.groupby(['series_id'])[col].min()
        df[col + '_std'] = data.groupby(['series_id'])[col].std()
        df[col + '_range'] = df[col + '_max'] - df[col + '_min']
        df[col + '_maxtoMin'] = df[col + '_max'] / df[col + '_min']
        df[col + '_mean_abs_chg'] = data.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        df[col + '_mean_change_of_abs_change'] = data.groupby('series_id')[col].apply(mean_change_of_abs_change)
        df[col + '_abs_max'] = data.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        df[col + '_abs_min'] = data.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        df[col + '_abs_avg'] = (df[col + '_abs_min'] + df[col + '_abs_max'])/2
    return df

In [9]:
train_raw = feat_eng(train_raw)
test_raw = feat_eng(test_raw)

In [10]:
train_raw.fillna(0,inplace=True)
test_raw.fillna(0,inplace=True)
train_raw.replace(-np.inf,0,inplace=True)
train_raw.replace(np.inf,0,inplace=True)
test_raw.replace(-np.inf,0,inplace=True)
test_raw.replace(np.inf,0,inplace=True)

In [11]:
target_raw['surface'] = le.fit_transform(target_raw['surface'])

In [12]:
train_df = pd.merge(train_raw,target_raw,on='series_id')
train_df.drop('group_id', axis=1, inplace=True)
features = list(train_df.columns.values[1:])
features.remove('surface')

X = train_df[features].values
y = pd.DataFrame(train_df['surface']).values

test = test_raw[features].values

submission = pd.DataFrame()
submission['series_id'] = test_raw.index.values
submission['target'] = ""

In [13]:
def runXGB(train_X, train_y, validation_X, validation_y, test_X):
    param = {}
    param['num_class'] = 9
    param['objective'] = 'multi:softmax'
    param['eta'] = 0.1
    param['max_depth'] = 6
    param['silent'] = 1
    param['gamma'] = 0
    param['eval_metric'] = "merror"
    param['min_child_weight'] = 3
    param['max_delta_step'] = 1
    param['subsample'] = 0.9
    param['colsample_bytree'] = 0.4
    param['colsample_bylevel'] = 0.6
    param['colsample_bynode'] = 0.5
    param['lambda'] = 0
    param['alpha'] = 0
    param['seed'] = 0
    num_rounds = 500

    plst = list(param.items())

    xgtrain = xgb.DMatrix(train_X, label = train_y)
    xgcv = xgb.DMatrix(validation_X, label = validation_y)
    xgtest = xgb.DMatrix(test_X)

    evallist = [(xgcv,'eval')]
    model = xgb.train(plst, xgtrain, num_rounds, evallist, early_stopping_rounds = 100)

    pred_test_y = model.predict(xgtest)
    return pred_test_y, model

In [14]:
kfold = 7
skf = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=42)

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    submission = pd.DataFrame()
    submission['series_id'] = test_raw.index.values
    submission['surface' + str(i+1)] = ""
    
    print('[Fold %d/%d]' % (i + 1, kfold))
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    
    preds, model = runXGB(X_train, y_train, X_valid, y_valid, test)
    
    submission['surface' + str(i+1)] = preds
    
    submission.to_csv('submission_' + str(i+1) + '.csv', index=False)

[Fold 1/7]
[0]	eval-merror:0.310786
Will train until eval-merror hasn't improved in 100 rounds.
[1]	eval-merror:0.244973
[2]	eval-merror:0.230347
[3]	eval-merror:0.201097
[4]	eval-merror:0.201097
[5]	eval-merror:0.193784
[6]	eval-merror:0.184644
[7]	eval-merror:0.180987
[8]	eval-merror:0.177331
[9]	eval-merror:0.171846
[10]	eval-merror:0.170018
[11]	eval-merror:0.159049
[12]	eval-merror:0.159049
[13]	eval-merror:0.151737
[14]	eval-merror:0.146252
[15]	eval-merror:0.144424
[16]	eval-merror:0.142596
[17]	eval-merror:0.137112
[18]	eval-merror:0.133455
[19]	eval-merror:0.129799
[20]	eval-merror:0.127971
[21]	eval-merror:0.129799
[22]	eval-merror:0.129799
[23]	eval-merror:0.126143
[24]	eval-merror:0.124314
[25]	eval-merror:0.122486
[26]	eval-merror:0.11883
[27]	eval-merror:0.120658
[28]	eval-merror:0.122486
[29]	eval-merror:0.115174
[30]	eval-merror:0.113346
[31]	eval-merror:0.111517
[32]	eval-merror:0.111517
[33]	eval-merror:0.115174
[34]	eval-merror:0.111517
[35]	eval-merror:0.115174
[36]

[75]	eval-merror:0.076782
[76]	eval-merror:0.076782
[77]	eval-merror:0.078611
[78]	eval-merror:0.076782
[79]	eval-merror:0.080439
[80]	eval-merror:0.080439
[81]	eval-merror:0.080439
[82]	eval-merror:0.080439
[83]	eval-merror:0.078611
[84]	eval-merror:0.078611
[85]	eval-merror:0.078611
[86]	eval-merror:0.078611
[87]	eval-merror:0.078611
[88]	eval-merror:0.078611
[89]	eval-merror:0.078611
[90]	eval-merror:0.078611
[91]	eval-merror:0.078611
[92]	eval-merror:0.080439
[93]	eval-merror:0.078611
[94]	eval-merror:0.078611
[95]	eval-merror:0.078611
[96]	eval-merror:0.078611
[97]	eval-merror:0.080439
[98]	eval-merror:0.080439
[99]	eval-merror:0.080439
[100]	eval-merror:0.080439
[101]	eval-merror:0.078611
[102]	eval-merror:0.078611
[103]	eval-merror:0.078611
[104]	eval-merror:0.078611
[105]	eval-merror:0.078611
[106]	eval-merror:0.078611
[107]	eval-merror:0.078611
[108]	eval-merror:0.078611
[109]	eval-merror:0.078611
[110]	eval-merror:0.078611
[111]	eval-merror:0.078611
[112]	eval-merror:0.078611

[138]	eval-merror:0.062271
[139]	eval-merror:0.062271
[140]	eval-merror:0.062271
[141]	eval-merror:0.062271
[142]	eval-merror:0.062271
[143]	eval-merror:0.064103
[144]	eval-merror:0.064103
[145]	eval-merror:0.065934
[146]	eval-merror:0.064103
[147]	eval-merror:0.065934
[148]	eval-merror:0.064103
[149]	eval-merror:0.065934
[150]	eval-merror:0.067766
[151]	eval-merror:0.069597
[152]	eval-merror:0.067766
[153]	eval-merror:0.065934
[154]	eval-merror:0.067766
[155]	eval-merror:0.065934
[156]	eval-merror:0.065934
[157]	eval-merror:0.065934
[158]	eval-merror:0.067766
[159]	eval-merror:0.067766
[160]	eval-merror:0.065934
[161]	eval-merror:0.065934
[162]	eval-merror:0.067766
[163]	eval-merror:0.065934
[164]	eval-merror:0.065934
[165]	eval-merror:0.067766
[166]	eval-merror:0.067766
[167]	eval-merror:0.067766
[168]	eval-merror:0.065934
[169]	eval-merror:0.067766
[170]	eval-merror:0.065934
[171]	eval-merror:0.065934
[172]	eval-merror:0.064103
[173]	eval-merror:0.064103
[174]	eval-merror:0.062271
[

[166]	eval-merror:0.075368
[167]	eval-merror:0.073529
[168]	eval-merror:0.073529
[169]	eval-merror:0.073529
[170]	eval-merror:0.071691
[171]	eval-merror:0.071691
[172]	eval-merror:0.071691
[173]	eval-merror:0.071691
[174]	eval-merror:0.071691
[175]	eval-merror:0.071691
[176]	eval-merror:0.071691
[177]	eval-merror:0.071691
[178]	eval-merror:0.071691
[179]	eval-merror:0.071691
[180]	eval-merror:0.071691
[181]	eval-merror:0.071691
[182]	eval-merror:0.071691
[183]	eval-merror:0.071691
[184]	eval-merror:0.071691
[185]	eval-merror:0.071691
[186]	eval-merror:0.071691
[187]	eval-merror:0.071691
[188]	eval-merror:0.071691
[189]	eval-merror:0.073529
[190]	eval-merror:0.073529
[191]	eval-merror:0.073529
[192]	eval-merror:0.073529
[193]	eval-merror:0.073529
[194]	eval-merror:0.073529
[195]	eval-merror:0.073529
[196]	eval-merror:0.073529
[197]	eval-merror:0.071691
[198]	eval-merror:0.071691
[199]	eval-merror:0.071691
[200]	eval-merror:0.071691
[201]	eval-merror:0.071691
[202]	eval-merror:0.071691
[

[97]	eval-merror:0.07919
[98]	eval-merror:0.07919
[99]	eval-merror:0.07919
[100]	eval-merror:0.081031
[101]	eval-merror:0.07919
[102]	eval-merror:0.07919
[103]	eval-merror:0.07919
[104]	eval-merror:0.07919
[105]	eval-merror:0.07919
[106]	eval-merror:0.07919
[107]	eval-merror:0.07919
[108]	eval-merror:0.07919
[109]	eval-merror:0.07919
[110]	eval-merror:0.07919
[111]	eval-merror:0.077348
[112]	eval-merror:0.077348
[113]	eval-merror:0.077348
[114]	eval-merror:0.07919
[115]	eval-merror:0.07919
[116]	eval-merror:0.077348
[117]	eval-merror:0.075506
[118]	eval-merror:0.075506
[119]	eval-merror:0.075506
[120]	eval-merror:0.075506
[121]	eval-merror:0.075506
[122]	eval-merror:0.077348
[123]	eval-merror:0.077348
[124]	eval-merror:0.077348
[125]	eval-merror:0.077348
[126]	eval-merror:0.07919
[127]	eval-merror:0.07919
[128]	eval-merror:0.07919
[129]	eval-merror:0.07919
[130]	eval-merror:0.07919
[131]	eval-merror:0.07919
[132]	eval-merror:0.07919
[133]	eval-merror:0.07919
[134]	eval-merror:0.07919
[

[185]	eval-merror:0.077491
[186]	eval-merror:0.077491
[187]	eval-merror:0.077491
[188]	eval-merror:0.077491
[189]	eval-merror:0.077491
[190]	eval-merror:0.077491
[191]	eval-merror:0.077491
[192]	eval-merror:0.077491
[193]	eval-merror:0.077491
[194]	eval-merror:0.079336
[195]	eval-merror:0.079336
[196]	eval-merror:0.079336
[197]	eval-merror:0.079336
[198]	eval-merror:0.077491
[199]	eval-merror:0.079336
[200]	eval-merror:0.079336
[201]	eval-merror:0.077491
[202]	eval-merror:0.079336
[203]	eval-merror:0.077491
[204]	eval-merror:0.077491
[205]	eval-merror:0.077491
[206]	eval-merror:0.077491
[207]	eval-merror:0.077491
[208]	eval-merror:0.077491
[209]	eval-merror:0.077491
[210]	eval-merror:0.077491
[211]	eval-merror:0.077491
[212]	eval-merror:0.077491
[213]	eval-merror:0.077491
[214]	eval-merror:0.077491
[215]	eval-merror:0.077491
[216]	eval-merror:0.077491
[217]	eval-merror:0.077491
[218]	eval-merror:0.077491
[219]	eval-merror:0.077491
[220]	eval-merror:0.077491
[221]	eval-merror:0.077491
[

[37]	eval-merror:0.127542
[38]	eval-merror:0.131238
[39]	eval-merror:0.125693
[40]	eval-merror:0.121996
[41]	eval-merror:0.127542
[42]	eval-merror:0.123845
[43]	eval-merror:0.118299
[44]	eval-merror:0.120148
[45]	eval-merror:0.118299
[46]	eval-merror:0.118299
[47]	eval-merror:0.116451
[48]	eval-merror:0.116451
[49]	eval-merror:0.114603
[50]	eval-merror:0.114603
[51]	eval-merror:0.114603
[52]	eval-merror:0.112754
[53]	eval-merror:0.112754
[54]	eval-merror:0.112754
[55]	eval-merror:0.112754
[56]	eval-merror:0.112754
[57]	eval-merror:0.110906
[58]	eval-merror:0.109057
[59]	eval-merror:0.10536
[60]	eval-merror:0.107209
[61]	eval-merror:0.109057
[62]	eval-merror:0.107209
[63]	eval-merror:0.10536
[64]	eval-merror:0.10536
[65]	eval-merror:0.107209
[66]	eval-merror:0.10536
[67]	eval-merror:0.10536
[68]	eval-merror:0.10536
[69]	eval-merror:0.10536
[70]	eval-merror:0.103512
[71]	eval-merror:0.099815
[72]	eval-merror:0.103512
[73]	eval-merror:0.103512
[74]	eval-merror:0.103512
[75]	eval-merror:0.

KeyboardInterrupt: 

In [None]:
submission1 = pd.read_csv('submission_1.csv')
submission2 = pd.read_csv('submission_2.csv')
submission3 = pd.read_csv('submission_3.csv')
submission4 = pd.read_csv('submission_4.csv')
submission5 = pd.read_csv('submission_5.csv')
submission6 = pd.read_csv('submission_6.csv')
submission7 = pd.read_csv('submission_7.csv')

In [None]:
from functools import reduce
submissions = [submission1,submission2,submission3,submission4,submission5,submission6,submission7]
submission_final = reduce(lambda left,right: pd.merge(left,right,on='series_id'), submissions)

In [None]:
submission_final = pd.DataFrame(submission_final.mode(axis='columns'))
submission_final = pd.DataFrame(submission_final[submission_final.columns[0]])
submission_final.columns = ['surface']
submission_final['surface'] = submission_final['surface'].astype('int')
submission_final['surface'] = le.inverse_transform(submission_final['surface'])
submission_final['series_id'] = test_raw.index.values
submission_final.to_csv('submission_final.csv', index=False)