In [1]:
import random
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import numpy as np
from numpy.random import seed
from numpy.fft import *
from scipy import signal, fftpack
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
import tensorflow.keras as keras
from keras.models import Model
from keras.layers import Conv1D, BatchNormalization, Activation, Dropout
from keras.layers import Input, GlobalAveragePooling1D, Dense
from keras.utils import to_categorical
from keras import backend as K 
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau

ModuleNotFoundError: No module named 'tensorflow'

#### Data loading

In [151]:
train=pd.read_csv('./data/train_features.csv')
train_labels=pd.read_csv('./data/train_labels.csv')
test=pd.read_csv('./data/test_features.csv')

submission=pd.read_csv('./data/sample_submission.csv')

pd.options.display.max_columns=50

#### Feature engneering

#####  가속도, 자이로, (자이로-가속도) 센서값을 에너지로 표현

In [152]:
train['acc_Energy']=(train['acc_x']**2+train['acc_y']**2+train['acc_z']**2)**(1/3)
test['acc_Energy']=(test['acc_x']**2+test['acc_y']**2+test['acc_z']**2)**(1/3)

train['gy_Energy']=(train['gy_x']**2+train['gy_y']**2+train['gy_z']**2)**(1/3)
test['gy_Energy']=(test['gy_x']**2+test['gy_y']**2+test['gy_z']**2)**(1/3)

train['gy_acc_Energy']=((train['gy_x']-train['acc_x'])**2+(train['gy_y']-train['acc_y'])**2+(train['gy_z']-train['acc_z'])**2)**(1/3)
test['gy_acc_Energy']=((test['gy_x']-test['acc_x'])**2+(test['gy_y']-test['acc_y'])**2+(test['gy_z']-test['acc_z'])**2)**(1/3)

###### id별 데이터는 0.02초마다 측정된 값들이기 때문에 이전 시간 대비 변화량 적용

In [153]:
#jerk 가속도변화의 변화량

dt=0.02 
def jerk_signal(signal): 
        return np.array([(signal[i+1]-signal[i])/dt for i in range(len(signal)-1)])

In [154]:
train_dt=[]
for i in tqdm(train['id'].unique()):
    temp=train.loc[train['id']==i]
    for v in train.columns[2:]:
        values=jerk_signal(temp[v].values)
        values=np.insert(values,0,0)
        temp.loc[:,v+'_dt']=values
    train_dt.append(temp)


100%|██████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:58<00:00, 53.62it/s]


In [69]:
test_dt=[]
for i in tqdm(test['id'].unique()):
    temp=test.loc[test['id']==i]
    for v in train.columns[2:]:
        values=jerk_signal(temp[v].values)
        values=np.insert(values,0,0)
        temp.loc[:,v+'_dt']=values
    test_dt.append(temp)

100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 70.15it/s]


##### 가속도, 자이로 센서값들을 푸리에 변환

In [155]:
def fourier_transform_one_signal(t_signal):
    complex_f_signal= fftpack.fft(t_signal)
    amplitude_f_signal=np.abs(complex_f_signal)
    return amplitude_f_signal

In [157]:
fft=[]
for i in tqdm(train['id'].unique()):
    temp=train.loc[train['id']==i]
    for i in train.columns[2:8]:
        temp[i]=fourier_transform_one_signal(temp[i].values)
    fft.append(temp)

100%|█████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:13<00:00, 235.98it/s]


In [74]:
fft_t=[]
for i in tqdm(test['id'].unique()):
    temp=test.loc[test['id']==i]
    for i in test.columns[2:8]:
        temp[i]=fourier_transform_one_signal(temp[i].values)
    fft_t.append(temp)
test=pd.concat(fft_t)

100%|███████████████████████████████████████████████████████████████████████████████| 782/782 [00:01<00:00, 421.48it/s]


In [None]:
train=pd.concat(train_dt)
test=pd.concat(test_dt)

##### Standard scaling 적용


In [45]:
col=train.columns
train_s=train.copy()
test_s=test.copy()

In [46]:
scaler = StandardScaler()

train_s.iloc[:,2:]= scaler.fit_transform(train_s.iloc[:,2:])
train_sc = pd.DataFrame(data = train_s,columns =col)

test_s.iloc[:,2:]= scaler.transform(test_s.iloc[:,2:])
test_sc = pd.DataFrame(data = test_s,columns =col)

In [47]:
train_sc

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z,acc_Energy,gy_Energy,gy_acc_Energy,acc_x_dt,acc_y_dt,acc_z_dt,gy_x_dt,gy_y_dt,gy_z_dt,acc_Energy_dt,gy_Energy_dt,gy_acc_Energy_dt
0,0,0,27.356382,8.807207,19.465910,0.376992,0.869226,0.150423,0.495681,-0.272719,-0.276391,0.000027,0.000298,-0.000433,0.000347,0.000373,0.000273,0.000101,0.001505,0.001501
1,0,1,-0.054866,0.833464,0.820412,-0.282128,-0.093560,0.011266,0.742974,-0.236152,-0.240632,0.416836,-0.118821,-0.255054,0.032738,-0.349095,0.377085,0.564992,0.166566,0.162871
2,0,2,0.024046,0.315921,0.081086,-0.182551,-0.053585,-0.003708,0.819822,-0.169815,-0.173080,0.086405,0.023750,-0.531727,-0.141582,-0.202368,-0.004887,0.175645,0.300944,0.306341
3,0,3,0.065632,0.117634,-0.040874,-0.194863,0.154242,0.005408,0.785669,-0.035229,-0.040560,-0.058780,-0.213920,0.285459,0.229520,-0.385106,-0.135647,-0.077915,0.609008,0.599518
4,0,4,0.151477,0.300751,0.317742,-0.350724,0.494539,0.154354,0.791528,0.021954,0.016872,0.039823,0.259227,-0.055206,0.057320,-0.174917,-0.028047,0.013483,0.259626,0.260669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1874995,3124,595,0.365037,0.011656,0.845701,0.080839,0.350395,0.112282,-0.138940,0.829394,0.823900,0.151679,0.037205,0.119409,-0.108728,-0.027804,-0.009085,-0.142794,0.063329,0.063674
1874996,3124,596,10.220817,5.476964,7.441373,3.605246,16.530576,11.843241,-0.167578,0.814816,0.809618,0.150658,-0.000363,0.265559,-0.027936,0.090560,-0.018412,-0.065316,-0.064300,-0.062949
1874997,3124,597,0.386337,0.177768,-0.080193,-0.192468,-0.033904,-0.227861,-0.151875,0.802027,0.797338,0.093524,-0.049283,0.260884,0.082744,0.123264,-0.152712,0.035970,-0.056225,-0.053918
1874998,3124,598,0.728823,0.014037,0.350745,0.136284,1.281790,0.403540,-0.175811,0.801880,0.797431,0.174681,-0.096564,0.071332,0.153722,-0.014412,-0.049662,-0.054574,0.000843,0.001922


##### 모델링

+ CNN, LSTM, CNN+LSTM 등 여러 구조 적용해보다가 CNN에서 Flatten 없이 Global average pooling 한 구조가 가장 성능이 좋아 채택했습니다.


In [49]:
X=np.array(train_sc.iloc[:,2:]).reshape(3125, 600, -1)
X.shape

(3125, 600, 18)

In [50]:
test_x=np.array(test_sc.iloc[:,2:]).reshape(782, 600, -1)
test_x.shape

(782, 600, 18)

In [51]:
y = train_labels['label'].values
y = tf.keras.utils.to_categorical(train_labels['label']) 
y.shape

(3125, 61)

##### 모델 구조 

In [52]:
def cnn_model(input_shape, classes):
    seed(2021)
    tf.random.set_seed(2021)
    
    input_layer = keras.layers.Input(input_shape)
    conv1 = keras.layers.Conv1D(filters=128, kernel_size=9, padding='same')(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)
    conv1 = keras.layers.Dropout(rate=0.3)(conv1)

    conv2 = keras.layers.Conv1D(filters=256, kernel_size=6, padding='same')(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)
    conv2 = keras.layers.Dropout(rate=0.4)(conv2)
    
    conv3 = keras.layers.Conv1D(128, kernel_size=3,padding='same')(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    conv3 = keras.layers.Dropout(rate=0.5)(conv3)
    
    gap = keras.layers.GlobalAveragePooling1D()(conv3)
    
    output_layer = keras.layers.Dense(classes, activation='softmax')(gap)
    
    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    
    model.compile(loss='categorical_crossentropy', optimizer = keras.optimizers.Adam(), 
        metrics=['accuracy'])
    
    return model

##### 10-fold StratifiedKFold

In [53]:
skf = StratifiedKFold(n_splits = 10, random_state = 2021, shuffle = True)
reLR = ReduceLROnPlateau(patience = 4,verbose = 1,factor = 0.5) 
es =EarlyStopping(monitor='val_loss', patience=8, mode='min')

accuracy = []
losss=[]
models=[]

for i, (train, validation) in enumerate(skf.split(X, y.argmax(1))) :
    mc = ModelCheckpoint(f'./model_kf/cv_study{i + 1}.h5',save_best_only=True, verbose=0, monitor = 'val_loss', mode = 'min', save_weights_only=True)
    print("-" * 20 +"Fold_"+str(i+1)+ "-" * 20)
    model = cnn_model((600,18),61)
    history = model.fit(X[train], y[train], epochs = 100, validation_data= (X[validation], y[validation]), 
                        verbose=1,batch_size=64,callbacks=[es,mc,reLR])
    model.load_weights(f'./model_kf/cv_study{i + 1}.h5')
    
    k_accuracy = '%.4f' % (model.evaluate(X[validation], y[validation])[1])
    k_loss = '%.4f' % (model.evaluate(X[validation], y[validation])[0])
    
    accuracy.append(k_accuracy)
    losss.append(k_loss)
    models.append(model)

print('\nK-fold cross validation Auc: {}'.format(accuracy))
print('\nK-fold cross validation loss: {}'.format(losss))

--------------------Fold_1--------------------
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100

Epoch 00038: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100

Epoch 00044: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100

Epoch 00051: ReduceLROnPlateau reducing learning rate t

##### 제출

In [55]:
test_X=np.array(test_sc.iloc[:,2:]).reshape(782, 600, -1)
test_X.shape

(782, 600, 18)

In [56]:
preds = []
for model in models:
    pred = model.predict(test_X)
    preds.append(pred)
pred = np.mean(preds, axis=0)

array([[1.11362115e-05, 2.25996450e-06, 2.11680984e-07, ...,
        6.39248919e-03, 1.50766409e-05, 3.40101496e-06],
       [4.13231173e-04, 1.99007154e-05, 1.22563812e-04, ...,
        8.93750075e-06, 2.06302539e-05, 1.22722922e-05],
       [1.86802447e-03, 3.26019563e-02, 1.62000761e-05, ...,
        8.92708835e-04, 1.20446784e-02, 2.22884724e-03],
       ...,
       [4.16979339e-04, 3.25313522e-06, 1.09946477e-05, ...,
        1.80470997e-05, 1.36069389e-06, 7.54901499e-04],
       [3.85870408e-06, 8.71700991e-04, 9.34987668e-07, ...,
        1.06083007e-07, 1.25161705e-05, 4.91666885e-09],
       [9.32284092e-05, 4.08958658e-06, 1.09709674e-06, ...,
        9.87853055e-05, 9.04675460e-07, 1.53993984e-04]], dtype=float32)

In [57]:
submission=pd.read_csv('./data/sample_submission.csv')
submission.iloc[:,1:]=pred
submission

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,...,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60
0,3125,0.000011,0.000002,2.116810e-07,4.708937e-08,1.865657e-04,1.124369e-07,3.247155e-04,0.000002,1.424222e-07,0.005938,0.100756,4.794668e-01,1.128225e-04,3.902865e-01,2.294270e-03,3.994864e-06,2.203607e-06,7.347716e-07,4.205005e-07,9.210566e-08,3.008443e-05,0.000004,7.120603e-07,0.005244,...,0.001091,9.638647e-04,0.000089,1.705775e-05,2.177705e-07,6.212062e-09,0.000386,0.000093,3.137485e-05,8.554223e-04,3.529570e-05,1.155294e-07,3.284435e-06,2.174506e-06,3.665759e-08,4.478506e-04,4.650679e-04,1.211206e-05,2.320302e-06,1.255611e-07,2.459125e-07,8.899527e-10,6.392489e-03,1.507664e-05,3.401015e-06
1,3126,0.000413,0.000020,1.225638e-04,1.330648e-03,5.290742e-05,6.115026e-04,2.825133e-06,0.000066,7.107466e-06,0.000011,0.000024,1.578770e-06,1.445539e-08,2.135061e-06,1.027968e-06,2.940293e-04,2.704988e-05,1.144425e-05,2.083481e-05,4.048173e-06,2.643172e-05,0.000180,2.340468e-03,0.000217,...,0.000310,4.793614e-06,0.000002,3.656085e-08,4.648645e-04,3.747970e-04,0.000015,0.000007,1.712846e-06,4.472250e-06,8.808629e-06,1.643756e-05,1.192999e-04,2.324675e-03,6.748711e-04,7.841913e-06,1.797475e-08,9.208373e-05,1.928369e-04,1.234607e-04,4.037772e-06,1.639472e-04,8.937501e-06,2.063025e-05,1.227229e-05
2,3127,0.001868,0.032602,1.620008e-05,1.283705e-05,3.405659e-05,6.504179e-04,1.054489e-01,0.000152,1.890009e-05,0.000041,0.002011,4.135342e-04,3.376489e-05,2.495863e-03,1.967650e-02,8.969253e-04,1.574834e-04,1.690211e-04,4.927005e-05,2.873521e-04,2.336423e-04,0.000258,5.826922e-07,0.000569,...,0.000390,4.887589e-02,0.006375,3.705476e-04,2.303389e-04,1.743668e-04,0.063292,0.195926,7.786207e-03,4.333588e-01,2.320479e-04,2.547552e-03,2.427308e-03,4.264420e-03,3.207233e-06,4.141561e-05,2.466323e-05,1.434178e-06,8.685702e-04,1.100743e-06,3.044588e-03,6.567472e-08,8.927088e-04,1.204468e-02,2.228847e-03
3,3128,0.000691,0.000010,3.345231e-05,1.176274e-04,1.757207e-05,1.440693e-04,5.411918e-06,0.000118,1.369785e-03,0.000056,0.000156,1.052319e-04,2.757647e-06,2.441343e-06,1.215714e-05,7.255679e-04,1.776193e-06,4.248767e-06,1.051885e-05,8.755162e-06,5.609869e-06,0.000086,1.119547e-04,0.000133,...,0.000285,2.999797e-06,0.000039,1.534645e-06,1.452966e-05,1.911514e-05,0.000014,0.000024,2.528368e-05,4.395631e-05,1.043876e-05,6.607959e-05,3.738505e-03,8.310549e-03,6.330538e-04,1.585062e-04,5.253386e-06,2.855355e-05,1.086101e-04,3.191976e-05,3.284602e-06,5.761672e-05,2.509627e-05,5.781790e-06,7.223898e-03
4,3129,0.004222,0.000031,2.595292e-06,2.950884e-04,2.631509e-04,2.281394e-05,7.910090e-07,0.000003,2.282780e-05,0.000076,0.000025,6.662969e-06,1.271762e-06,3.870370e-07,1.917479e-05,4.299077e-04,9.659639e-06,2.665155e-06,5.571738e-05,2.121639e-06,4.022555e-06,0.000007,3.395142e-04,0.000008,...,0.000016,4.269365e-07,0.000011,5.161713e-06,6.367538e-07,5.421267e-06,0.000096,0.000048,8.105874e-08,1.194872e-06,3.231482e-06,9.941075e-07,3.176446e-04,9.502537e-04,5.820632e-04,5.014890e-06,8.111192e-07,1.425431e-05,2.082707e-06,2.616074e-06,4.458506e-07,2.302143e-03,8.492467e-05,2.439178e-06,3.807932e-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,0.007722,0.000045,2.692790e-07,1.392818e-04,3.632546e-04,3.666391e-05,2.516073e-07,0.000004,5.034942e-05,0.000009,0.000006,5.608847e-07,8.443988e-08,4.424959e-08,3.241649e-06,5.664256e-04,1.966830e-05,4.358430e-06,1.377685e-04,1.753900e-06,9.672356e-07,0.000002,2.599161e-04,0.000001,...,0.000004,1.128838e-07,0.000002,2.651635e-07,1.488061e-06,1.090116e-05,0.000015,0.000004,8.845261e-08,5.673692e-07,2.185301e-06,7.444148e-07,2.017738e-04,3.374573e-04,1.702022e-03,2.183509e-07,3.351120e-08,2.855817e-06,4.701814e-07,2.122393e-07,2.414436e-07,1.602291e-02,2.339808e-05,2.259146e-06,5.595793e-03
778,3903,0.000128,0.000006,2.418103e-06,2.474411e-04,7.793277e-05,3.450090e-05,3.203852e-08,0.000003,1.091073e-06,0.000002,0.000001,2.682169e-07,1.466343e-08,1.076064e-07,2.007611e-07,1.504268e-04,3.400251e-05,3.794539e-06,1.547392e-04,1.618792e-06,2.004600e-06,0.000002,1.284377e-03,0.000007,...,0.000003,7.070109e-08,0.000002,2.374649e-08,1.038312e-05,3.036143e-05,0.000024,0.000007,3.591797e-08,4.240455e-07,1.551022e-06,1.363590e-07,1.432590e-05,1.126615e-04,8.413609e-05,4.325434e-07,7.853837e-09,1.182301e-05,1.135056e-06,1.329223e-06,6.008780e-07,3.952625e-03,5.511012e-06,6.515145e-07,3.573783e-05
779,3904,0.000417,0.000003,1.099465e-05,9.337877e-05,1.634578e-05,8.988440e-05,1.229814e-06,0.000017,2.575660e-05,0.000022,0.000035,7.194992e-06,3.232979e-07,5.068840e-07,7.551430e-06,1.616150e-04,8.896222e-07,4.917642e-07,3.038611e-06,8.085279e-07,4.941950e-06,0.000040,1.765404e-04,0.000032,...,0.000110,4.938819e-07,0.000003,5.427270e-07,2.785574e-06,5.802608e-06,0.000009,0.000012,8.832764e-07,4.378901e-06,1.285613e-06,8.524936e-06,4.783742e-04,2.380906e-03,1.889450e-04,1.027952e-05,3.110814e-07,1.133624e-05,1.826907e-05,1.181306e-05,3.889178e-07,7.185910e-05,1.804710e-05,1.360694e-06,7.549015e-04
780,3905,0.000004,0.000872,9.349877e-07,2.897043e-09,1.360930e-08,2.737833e-08,1.765427e-02,0.000002,1.666289e-08,0.000002,0.000002,1.614539e-05,3.251577e-09,2.272466e-04,7.495165e-08,4.059368e-07,1.741021e-05,4.617530e-07,1.403474e-09,3.055238e-08,4.119734e-07,0.000003,1.097873e-09,0.000007,...,0.000006,9.801980e-01,0.000001,3.178972e-07,1.237006e-05,4.006409e-08,0.000007,0.000009,2.805889e-05,3.204079e-04,2.950669e-07,4.707098e-04,5.880752e-07,8.497389e-07,5.535009e-10,4.641344e-07,5.805668e-09,7.169170e-09,2.958758e-05,7.718703e-08,1.964780e-06,6.251173e-12,1.060830e-07,1.251617e-05,4.916669e-09


In [30]:
submission.to_csv('final_submission.csv',index=False)