In [1]:
import csv
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import imblearn
import warnings
warnings.filterwarnings('ignore')

In [2]:
def upsample_label(temp_x, temp_y):
    y_time_list = temp_y['time'].values
    y_label = temp_y['label'].values
    jdx = 0
    label_list = []
    for index,row in temp_x.iterrows():
        try:
            if row['time'] > y_time_list[jdx]:
                jdx+=1
            label_list.append(y_label[jdx])
        except:
            label_list.append(y_label[jdx - 1])
    return label_list

In [3]:
filepath = 'TrainingData/'
column_list = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z','subject', 'time', 'label']
final_df = pd.DataFrame([], columns = column_list)
for i in glob.glob(filepath + '*.csv'):
    df_type = i.split('.')[0].split('__')[1]
    file_name = i.split("\\")[1].split("__")[0]
#     file_name = i.split('/')[1].split('__')[0]
    if df_type == 'x':
        x = open(filepath + file_name + '__x.csv')
        x = pd.read_csv(x)
        x['subject'] = [file_name] * x.shape[0]
        
        x_time = open(filepath + file_name + '__x_time.csv')
        x_time = pd.read_csv(x_time)
        x['time'] = x_time
        
        y = open(filepath + file_name + '__y.csv')
        y = pd.read_csv(y)
        y['subject'] = [file_name] * y.shape[0]
        
        y_time = open(filepath + file_name + '__y_time.csv')
        y_time = pd.read_csv(y_time)
        y['time'] = y_time
        
        y.columns = ['label', 'subject', 'time']
        label_list = upsample_label(x, y)
        x['label'] = label_list
        x.columns = column_list
        final_df = pd.concat([final_df, x], ignore_index=True)

In [4]:
from scipy import stats

def create_windows(X, y, time_steps=1, step=1):
    Xs, ys = [], []
      
    for i in range(0, len(X) - time_steps + 1, step):
        v = X.iloc[i:(i + time_steps)].values
        labels = y.iloc[i: i + time_steps]
        Xs.append(v)
        ys.append(stats.mode(labels)[0][0])

    return np.array(Xs), np.array(ys).reshape(-1, 1)

In [5]:
X = final_df[['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']]
y = final_df['label']

TIME_STEPS = 40
STEP = 1

X_data, y_data = create_windows(X, y, time_steps=TIME_STEPS,step=STEP)

In [6]:
print(X.shape, X_data.shape)
print(y.shape, y_data.shape)

(1341617, 6) (1341578, 40, 6)
(1341617,) (1341578, 1)


In [7]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

df_label = pd.DataFrame(y_data, columns=['label'])
df_label['index_'] = df_label.index

## Library for performing undersampling
rus = RandomUnderSampler(sampling_strategy='not minority', random_state=1)
df_balanced, balanced_labels = rus.fit_resample(df_label, df_label['label'])

sos = SMOTE(sampling_strategy='not majority')
df_balanced, balanced_labels  = sos.fit_resample(df_balanced, balanced_labels)

df_balanced = pd.DataFrame(df_balanced, columns=['label', 'index_'])

X_data = X_data[df_balanced['index_'].values]
y_data = y_data[df_balanced['index_'].values]

In [8]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.1, random_state=42)

## Converting label to OneHot Encoding
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
enc = enc.fit(y_train)
y_train = enc.transform(y_train)
y_val = enc.transform(y_val)
y_test = enc.transform(y_test)

In [9]:
X_train = np.asarray(X_train).astype('float32')
X_val = np.asarray(X_val).astype('float32')

In [18]:
X_test = np.asarray(X_test).astype('float32')

In [10]:
print(X_val.shape, y_val.shape)
print(X_train.shape, y_train.shape)

(59382, 40, 6) (59382, 4)
(153952, 40, 6) (153952, 4)


In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Dropout, BatchNormalization, MaxPooling1D, UpSampling1D, Dense, Flatten
from tensorflow.keras.optimizers import Adam, SGD

In [23]:
dropout_rate = 0.5
input_layer = Input([40, 6])

conv1 = Conv1D(filters=32, kernel_size=2, padding="same", activation = "relu")(input_layer)
conv1 = Dropout(dropout_rate)(conv1)
conv1 = MaxPooling1D(pool_size =2)(conv1)

conv2 = Conv1D(filters=64, kernel_size=2, padding="same", activation = "relu")(conv1)
conv2 = Dropout(dropout_rate)(conv2)
conv2 = MaxPooling1D(pool_size =2)(conv2)

conv3 = Conv1D(filters=128, kernel_size=2, padding="same", activation = "relu")(conv2)
conv3 = Dropout(dropout_rate)(conv3)
conv3 = MaxPooling1D(pool_size =2)(conv3)

deconv3 = Conv1D(filters=128, kernel_size=2, padding = "same", activation="relu")(conv3)
deconv3 = Dropout(dropout_rate)(deconv3)
deconv3 = UpSampling1D(size =2)(deconv3)

deconv2 = Conv1D(filters=64, kernel_size=2, padding = "same", activation="relu")(deconv3)
deconv2 = Dropout(dropout_rate)(deconv2)
deconv2 = UpSampling1D(size =2)(deconv2)

deconv1 = Conv1D(filters=32, kernel_size=2, padding = "same", activation="relu")(deconv2)
deconv1 = Dropout(dropout_rate)(deconv1)
deconv1 = UpSampling1D(size =2)(deconv1)

fl1 = Flatten()(deconv1)

output_layer = Dense(4, activation="softmax")(fl1)

model = Model(inputs=input_layer, outputs=output_layer)

In [24]:
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 40, 6)]           0         
                                                                 
 conv1d_12 (Conv1D)          (None, 40, 32)            416       
                                                                 
 dropout_12 (Dropout)        (None, 40, 32)            0         
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 20, 32)           0         
 1D)                                                             
                                                                 
 conv1d_13 (Conv1D)          (None, 20, 64)            4160      
                                                                 
 dropout_13 (Dropout)        (None, 20, 64)            0         
                                                           

In [25]:
model.compile(optimizer=Adam(learning_rate=0.001),  loss='categorical_crossentropy')

In [None]:
cnn = model.fit(X_train, y_train, epochs=200, verbose=1, batch_size=64, validation_data = (X_val, y_val))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200

In [19]:
accuracy = model.evaluate(X_test, y_test, verbose=1)



In [20]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis = 1)
y_test = np.argmax(y_test, axis = 1)

In [21]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.89      0.90      1663
           1       0.99      0.99      0.99      1661
           2       0.99      0.98      0.98      1581
           3       0.91      0.94      0.92      1693

    accuracy                           0.95      6598
   macro avg       0.95      0.95      0.95      6598
weighted avg       0.95      0.95      0.95      6598

