In [269]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from scipy import stats


In [270]:
from keras.models import Model
from keras.layers import LSTM, Dropout, Dense, Conv1D, ConvLSTM2D, Input
from keras.optimizers import Adam, SGD,RMSprop
from numpy import save
from numpy import load
import sklearn
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
import pymongo
from pymongo import MongoClient
import pprint

import pandas as pd
import numpy as np
import subprocess

#bson to json
from bson.objectid import ObjectId
from bson.json_util import loads, dumps

import codecs
import sys
import json
import dns
import os

# Read Data

In [331]:
def read_data():  # returns a merged data file of features and target labels
    #The features are input
    
    df = pd.read_csv('Torso_Motion.txt', delimiter = ' ',dtype = 'float32')
    df.columns = ['Timestep','AccX','AccY','AccZ','Gyroscope_X','Gyroscope_Y','Gyroscope_Z','Mag_meter_X','Mag_meter_Y','Mag_meter_Z','Orientation_w','Orientation_x','Orientation_y','Orientation_z','Gravity_X','Gravity_Y','Gravity_Z','Linear_accX','Linear_accY','Linear_accZ','Pressure','Altitude','Temperature']
    
    d = pd.read_csv('Torso_Motion.txt', delimiter = ' ',dtype  = str,usecols=[0])
    d.columns = ['Timestep']
    df['Timestep'] = d['Timestep']
    df['Timestep']= df['Timestep'].apply(lambda x : x.split('.')[0])
    #return df
    #df.head()
    
    # input target labels
    
    dfLabel = pd.read_csv('Label.txt', delimiter=' ', dtype = 'float64')
    dfLabel.columns = ['Timestep','Coarse_label','Fine_label','Road','Traffic','Tunnel','Social','Food']
    
    dLabel = pd.read_csv('Label.txt', delimiter=' ', dtype = str, usecols=[0])
    dLabel.columns = ['Timestep']
    #dLabel.columns = ['Timestep','Coarse_label','Fine_label','Road','Traffic','Tunnel','Social','Food']
    
    dfLabel['Timestep'] = dLabel['Timestep']
    #dfLabel.head()
    #len(dfLabel)
    
    x = df
    y = dfLabel[['Timestep','Coarse_label','Fine_label']]
    
    data = pd.merge(x, y, on = 'Timestep')
    
    return data.dropna()

In [7]:
#data.head()

# Data Preprocessing

In [11]:
def filter_data(data):
    
    data_filtered = data[(data['Coarse_label'] > 0  )].reset_index(drop = True) # removing claas labels zero
    
    #print(data_filtered['Coarse_label'].value_counts())
    
    return data_filtered
    

In [10]:
LEN_OF_SEQUENCE = 200
OVERLAP = 50 # 25% overlap
FEATURE_DIMENSION = 19

In [12]:
def make_numpy_matrix(data) :
    # This function converts the data into a matrix that could be fed inside lstm. Sequence len = 200
    
    X = []
    y = []
    for i in range(OVERLAP,len(data) - LEN_OF_SEQUENCE ,LEN_OF_SEQUENCE):
            #print(i)
        #label = 1.0

        accX = data['AccX'].values[i:i + LEN_OF_SEQUENCE]
        accX1 = data['AccX'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        accY = data['AccY'].values[i:i + LEN_OF_SEQUENCE]
        accY1 = data['AccY'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        accZ = data['AccZ'].values[i:i + LEN_OF_SEQUENCE]
        accZ1 = data['AccZ'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        Gyro_X = data['Gyroscope_X'].values[i :i + LEN_OF_SEQUENCE]
        Gyro_X1 = data['Gyroscope_X'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Gyro_Y = data['Gyroscope_Y'].values[i :i + LEN_OF_SEQUENCE]
        Gyro_Y1 = data['Gyroscope_Y'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Gyro_Z = data['Gyroscope_Z'].values[i :i + LEN_OF_SEQUENCE]
        Gyro_Z1 = data['Gyroscope_Z'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        Mag_X = data['Mag_meter_X'].values[i :i + LEN_OF_SEQUENCE]
        Mag_X1 = data['Mag_meter_X'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Mag_Y = data['Mag_meter_Y'].values[i :i + LEN_OF_SEQUENCE]
        Mag_Y1 = data['Mag_meter_Y'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Mag_Z = data['Mag_meter_Z'].values[i :i + LEN_OF_SEQUENCE]
        Mag_Z1 = data['Mag_meter_Z'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        Linear_X = data['Linear_accX'].values[i :i + LEN_OF_SEQUENCE]
        Linear_X1 = data['Linear_accX'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Linear_Y = data['Linear_accY'].values[i :i + LEN_OF_SEQUENCE]
        Linear_Y1 = data['Linear_accY'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Linear_Z = data['Linear_accZ'].values[i :i + LEN_OF_SEQUENCE]
        Linear_Z1 = data['Linear_accZ'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        Gravity_X = data['Gravity_X'].values[i :i + LEN_OF_SEQUENCE]
        Gravity_X1 = data['Gravity_X'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Gravity_Y = data['Gravity_Y'].values[i :i + LEN_OF_SEQUENCE]
        Gravity_Y1 = data['Gravity_Y'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Gravity_Z = data['Gravity_Z'].values[i :i + LEN_OF_SEQUENCE]
        Gravity_Z1 = data['Gravity_Z'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        Orient_X = data['Orientation_x'].values[i :i + LEN_OF_SEQUENCE]
        Orient_X1 = data['Orientation_x'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Orient_W = data['Orientation_w'].values[i :i + LEN_OF_SEQUENCE]
        Orient_W1 = data['Orientation_w'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Orient_Y = data['Orientation_y'].values[i :i + LEN_OF_SEQUENCE]
        Orient_Y1 = data['Orientation_y'].values[i - 50:i + LEN_OF_SEQUENCE- 50]
        Orient_Z = data['Orientation_z'].values[i :i + LEN_OF_SEQUENCE]
        Orient_Z1 = data['Orientation_z'].values[i - 50:i + LEN_OF_SEQUENCE- 50]

        X.append([accX,accY,accZ,Gyro_X,Gyro_Y,Gyro_Z,Mag_X,Mag_Y, Mag_Z,
                  Linear_X, Linear_Y, Linear_Z,Gravity_X, Gravity_Y, Gravity_Z
                 ,Orient_X, Orient_Y, Orient_Z,Orient_W])

        X.append([accX1,accY1,accZ1,Gyro_X1,Gyro_Y1,Gyro_Z1,Mag_X1,Mag_Y1, Mag_Z1,
                   Linear_X1, Linear_Y1, Linear_Z1,Gravity_X1, Gravity_Y1, Gravity_Z1
                 ,Orient_X1, Orient_Y1, Orient_Z1,Orient_W1])

        #Here we are taking the mode of labels in the time sequence instead of the last recorder activity

        labels = stats.mode(data['Coarse_label'][i :i + LEN_OF_SEQUENCE])
        y.append(labels[0][0])

        labels = stats.mode(data['Coarse_label'][i - 50:i + LEN_OF_SEQUENCE- 50])
        y.append(labels[0][0])

        #y.append(label)
        #y.append(label)
    X = np.asarray(X,dtype = 'float32')
    X  = X.reshape((-1,200,19))

    y = np.asarray(y, dtype = 'float32')
    
    return X,y



In [347]:
data = read_data()
filtered_data = filter_data(data)
X,y = make_numpy_matrix(filtered_data)

In [326]:
y.shape

(18254,)

In [327]:
df = pd.DataFrame(y, columns = ['label'])

In [291]:
df['label'].value_counts() ## Check distribution of data

6.0    9130
1.0    4246
4.0    3736
2.0    3617
3.0     105
Name: label, dtype: int64

## Save the processed data into .npy file

In [372]:
cd C:\Users\Nithish\Desktop\User 1 Bag

C:\Users\Nithish\Desktop\User 1 Bag


In [349]:
save('1_Torso_27_6_X', X)

In [350]:
save('1_Torso_27_6_y',y)

In [373]:
dataX = load('1_Bag_27_6_X.npy')
datay = load('1_Bag_27_6_y.npy')
X = load('1_Bag_26_6_X.npy')
y = load('1_Bag_26_6_y.npy')
dataX, datay = concatenate(dataX, datay,X,y )

In [374]:
X = load('1_Bag_22_6_X.npy')
y = load('1_Bag_22_6_y.npy')

In [375]:
dataX, datay = concatenate(dataX, datay,X,y )

In [380]:
dataX.shape

(60882, 200, 19)

In [379]:
datay.shape

(60882,)

In [381]:
save('Bag_User1_fullX', dataX)
save('Bag_User1_fully', datay)

In [208]:
def concatenate(X,y, new_dataX, new_datay):
    
    
    dataX = np.concatenate((X, new_dataX), axis = 0)
    

  
    datay = np.concatenate((y, new_datay), axis = 0)
  #print(data.shape)

    return dataX, datay

In [33]:
filtered_data

Unnamed: 0,Timestep,AccX,AccY,AccZ,Gyroscope_X,Gyroscope_Y,Gyroscope_Z,Mag_meter_X,Mag_meter_Y,Mag_meter_Z,...,Gravity_Y,Gravity_Z,Linear_accX,Linear_accY,Linear_accZ,Pressure,Altitude,Temperature,Coarse_label,Fine_label
0,1498470339500,1.826706,-8.716733,1.270882,0.126351,-1.043744,-0.467572,0.161344,36.838909,7.990065,...,-8.949545,3.474233,-4.070962,-3.268496,-6.722267,1007.998291,0.0,0.0,2.0,5.0
1,1498470339510,1.664390,-9.048232,0.479703,-0.484148,-1.850490,-0.500799,0.714364,37.355968,7.321272,...,-9.001525,3.338862,-4.479416,-3.886921,-6.419862,1007.993713,0.0,0.0,2.0,5.0
2,1498470339520,0.616486,-9.157213,0.741797,-0.926514,-2.151571,-0.480267,1.034413,37.841473,6.840587,...,-9.023118,3.219236,-4.695837,-4.195946,-6.186209,1007.989136,0.0,0.0,2.0,5.0
3,1498470339530,0.327102,-9.599220,0.925615,-1.220342,-1.960565,-0.387256,1.355843,38.153896,6.557631,...,-9.108921,2.957080,-5.076182,-4.540824,-6.315186,1007.984558,0.0,0.0,2.0,5.0
4,1498470339540,0.235793,-10.012119,0.419633,-1.136532,-1.267445,-0.193769,1.656198,38.445248,6.007956,...,-9.147361,2.782432,-5.416662,-4.145084,-7.195375,1007.979919,0.0,0.0,2.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2083501,1498508664860,-0.750136,-8.336208,-14.167012,-0.863798,-0.316319,-0.302846,2.984020,-4.382990,45.715466,...,-3.503667,-9.021282,0.079796,-0.587120,0.029067,1005.144409,0.0,0.0,4.0,8.0
2083502,1498508664870,0.064377,-6.847972,-12.598029,-0.989914,-0.379662,-0.365411,3.000000,-4.206099,45.574898,...,-3.429348,-9.060199,-0.136523,-1.144958,-1.680338,1005.133667,0.0,0.0,4.0,8.0
2083503,1498508664880,1.017720,-5.981620,-10.889175,-1.011755,-0.333938,-0.250309,2.647491,-4.187500,45.680004,...,-3.335444,-9.097939,-1.006741,-1.162032,-1.081365,1005.122986,0.0,0.0,4.0,8.0
2083504,1498508664890,3.302852,-6.285632,-9.333902,-0.994788,-0.246604,-0.180778,2.345878,-3.908378,45.966621,...,-3.259725,-9.125040,-0.058206,-1.405008,-0.646992,1005.112244,0.0,0.0,4.0,8.0


In [115]:
cd C:\\Users\\Nithish\\Desktop\\Research Internship

C:\Users\Nithish\Desktop\Research Internship


In [117]:
X = load('preview_26_X.npy')

In [120]:
y = load('preview_26_y.npy')

In [121]:
X.shape

(20834, 200, 19)

In [125]:
X_2d = X.reshape((20834,-1))

In [139]:
X_2d = X_2d.reshape((20834,200,19))

In [141]:
X_2d.shape

(20834, 200, 19)

In [294]:
def feature_scaling(X):
    # Converting the 3d matrix to 2d for feature scaling
    X_2d = X.reshape((-1,19))
    
    scaler = MinMaxScaler(feature_range=(-1, 1))
    
    X_scaled = scaler.fit_transform(X_2d)
    
    # Rescaling the 2d matrix to 3d
    X_scaled_3d = X_scaled.reshape((-1,200,19))
    
    print(X_scaled_3d.shape)
    
    return(X_scaled_3d)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
def one_hot_target(y):
    
    y_one_hot = pd.get_dummies(y)
    
    return y_one_hot.values

In [233]:
#np.random.seed(32)
def shuffle_along_axis(a, axis): # This is for shuffling the dataset
    
    np.random.seed(32)
    
    idx = np.random.rand(*a.shape,).argsort(axis=axis)
    
    return np.take_along_axis(a,idx,axis=axis)


In [300]:
x = np.array([[-10,0.2],[10,0.5],[7,0.1]])
#feature_scaling(x)
scaler  = MinMaxScaler(feature_range=(-1,1))
scaler.fit_transform(x)

array([[-1. , -0.5],
       [ 1. ,  1. ],
       [ 0.7, -1. ]])

In [122]:
y.shape

(20834,)

In [113]:
from sklearn.preprocessing import MinMaxScaler

In [114]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [127]:
X_scaled = scaler.fit_transform(X_2d)

In [129]:
X_scaled.shape

(20834, 3800)

In [130]:
X_scaled_3d = X_scaled.reshape((20834,200,19))

In [131]:
X_scaled_3d.shape

(20834, 200, 19)

In [142]:
y_one_hot = pd.get_dummies(y)

In [148]:
y_one_hot

Unnamed: 0,1.0,2.0,3.0,4.0,6.0
0,0,1,0,0,0
1,0,1,0,0,0
2,0,1,0,0,0
3,0,1,0,0,0
4,0,1,0,0,0
...,...,...,...,...,...
20829,0,0,0,1,0
20830,0,0,0,1,0
20831,0,0,0,1,0
20832,0,0,0,1,0


In [243]:
a = [2,3,4,5,6,1]

In [247]:
b = [8,2,7,9,2,1]

In [248]:
b_ = shuffle_along_axis(np.array(b),0)

In [240]:
data_shuffled= shuffle_along_axis(data,0)

In [241]:
data_shuffled.shape

(41668, 200, 19)

In [238]:
data = np.concatenate((X, X_scaled_), axis = 0)

In [239]:
data.shape

(41668, 200, 19)

In [237]:
X_scaled_.shape

(20834, 200, 19)

In [151]:
y_= (y_one_hot.values)

In [158]:
y[135]

2.0

In [159]:
y_[135]

array([0, 1, 0, 0, 0], dtype=uint8)

In [161]:
y_.shape

(20834, 5)

In [169]:
seed = 10
y_shuffle = [np.random.shuffle(a) for a in y_]

In [171]:
len(y_shuffle)

20834

# Model

In [286]:
input_ = Input(shape = (200,19))
x = LSTM(200, return_sequences=True)(input_)
x = Dropout(0.3)(x)
x = LSTM(200, return_sequences= True)(x)
x = LSTM(100)(x)
x = Dropout(0.2)(x)
x = Dense(units = 100, activation='relu')(x)
output = Dense(8,  activation='softmax')(x)

In [287]:
Stacked_LSTM_model = Model(input_, output)

In [283]:
Stacked_LSTM_model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 200, 19)           0         
_________________________________________________________________
lstm_22 (LSTM)               (None, 200, 200)          176000    
_________________________________________________________________
dropout_9 (Dropout)          (None, 200, 200)          0         
_________________________________________________________________
lstm_23 (LSTM)               (None, 200, 200)          320800    
_________________________________________________________________
lstm_24 (LSTM)               (None, 100)               120400    
_________________________________________________________________
dropout_10 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 100)               1010

In [268]:
from keras.utils import plot_model