In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from keras.layers import Conv2D, MaxPooling2D, Conv1D, Flatten
from keras.layers.convolutional import Conv3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization

# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM, TimeDistributed
from keras.layers.core import Dense, Dropout

ModuleNotFoundError: ignored

In [None]:
import numpy as np
import pandas as pd

# get the features from the file features.txt
features = list()
with open('UCI_HAR_Dataset/features.txt') as f:
    features = [line.split()[1] for line in f.readlines()]
print('No of Features: {}'.format(len(features)))

In [None]:
# get the data from txt files to pandas dataffame
X_train = pd.read_csv('UCI_HAR_dataset/train/X_train.txt', delim_whitespace=True, header=None, names=features)

# add subject column to the dataframe
X_train['subject'] = pd.read_csv('UCI_HAR_dataset/train/subject_train.txt', header=None, squeeze=True)

y_train = pd.read_csv('UCI_HAR_dataset/train/y_train.txt', names=['Activity'], squeeze=True)
y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS',\
                       4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
train = X_train
train['Activity'] = y_train
train['ActivityName'] = y_train_labels
train.sample()

In [None]:
train.shape

In [None]:
# get the data from txt files to pandas dataffame
X_test = pd.read_csv('UCI_HAR_dataset/test/X_test.txt', delim_whitespace=True, header=None, names=features)

# add subject column to the dataframe
X_test['subject'] = pd.read_csv('UCI_HAR_dataset/test/subject_test.txt', header=None, squeeze=True)

# get y labels from the txt file
y_test = pd.read_csv('UCI_HAR_dataset/test/y_test.txt', names=['Activity'], squeeze=True)
y_test_labels = y_test.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS',\
                       4:'SITTING', 5:'STANDING',6:'LAYING'})


# put all columns in a single dataframe
test = X_test
test['Activity'] = y_test
test['ActivityName'] = y_test_labels
test.sample()

In [None]:
test.shape

In [None]:
print('No of duplicates in train: {}'.format(sum(train.duplicated())))
print('No of duplicates in test : {}'.format(sum(test.duplicated())))

In [None]:
print('We have {} NaN/Null values in train'.format(train.isnull().values.sum()))
print('We have {} NaN/Null values in test'.format(test.isnull().values.sum()))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.rcParams['font.family'] = 'Dejavu Sans'

In [None]:
plt.figure(figsize=(16,8))
plt.title('Data provided by each user', fontsize=20)
sns.countplot(x='subject',hue='ActivityName', data = train)
plt.show()

In [None]:
plt.title('No of Datapoints per Activity', fontsize=15)
sns.countplot(train.ActivityName)
plt.xticks(rotation=90)
plt.show()

In [None]:
columns = train.columns

# Removing '()' from column names
columns = columns.str.replace('[()]','')
columns = columns.str.replace('[-]', '')
columns = columns.str.replace('[,]','')

train.columns = columns
test.columns = columns

test.columns

In [None]:
train.to_csv('UCI_HAR_Dataset/csv_files/train.csv', index=False)
test.to_csv('UCI_HAR_Dataset/csv_files/test.csv', index=False)

In [None]:
sns.set_palette("Set1", desat=0.80)
facetgrid = sns.FacetGrid(train, hue='ActivityName', size=6,aspect=2)
facetgrid.map(sns.distplot,'tBodyAccMagmean', hist=False)\
    .add_legend()
plt.annotate("Stationary Activities", xy=(-0.956,17), xytext=(-0.9, 23), size=20,\
            va='center', ha='left',\
            arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))

plt.annotate("Moving Activities", xy=(0,3), xytext=(0.2, 9), size=20,\
            va='center', ha='left',\
            arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))
plt.show()

In [None]:
# for plotting purposes taking datapoints of each activity to a different dataframe
df1 = train[train['Activity']==1]
df2 = train[train['Activity']==2]
df3 = train[train['Activity']==3]
df4 = train[train['Activity']==4]
df5 = train[train['Activity']==5]
df6 = train[train['Activity']==6]

plt.figure(figsize=(14,7))
plt.subplot(2,2,1)
plt.title('Stationary Activities(Zoomed in)')
sns.distplot(df4['tBodyAccMagmean'],color = 'r',hist = False, label = 'Sitting')
sns.distplot(df5['tBodyAccMagmean'],color = 'm',hist = False,label = 'Standing')
sns.distplot(df6['tBodyAccMagmean'],color = 'c',hist = False, label = 'Laying')
plt.axis([-1.01, -0.5, 0, 35])
plt.legend(loc='center')

plt.subplot(2,2,2)
plt.title('Moving Activities')
sns.distplot(df1['tBodyAccMagmean'],color = 'red',hist = False, label = 'Walking')
sns.distplot(df2['tBodyAccMagmean'],color = 'blue',hist = False,label = 'Walking Up')
sns.distplot(df3['tBodyAccMagmean'],color = 'green',hist = False, label = 'Walking down')
plt.legend(loc='center right')


plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(7,7))
sns.boxplot(x='ActivityName', y='tBodyAccMagmean',data=train, showfliers=False, saturation=1)
plt.ylabel('Acceleration Magnitude mean')
plt.axhline(y=-0.7, xmin=0.1, xmax=0.9,dashes=(5,5), c='g')
plt.axhline(y=-0.05, xmin=0.4, dashes=(5,5), c='m')
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.boxplot(x='ActivityName', y='angleXgravityMean', data=train)
plt.axhline(y=0.08, xmin=0.1, xmax=0.9,c='m',dashes=(5,3))
plt.title('Angle between X-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.show()

In [None]:
sns.boxplot(x='ActivityName', y='angleYgravityMean', data = train, showfliers=False)
plt.title('Angle between Y-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# performs t-sne with different perplexity values and their repective plots..

def perform_tsne(X_data, y_data, perplexities, n_iter=1000, img_name_prefix='t-sne'):
        
    for index,perplexity in enumerate(perplexities):
        # perform t-sne
        print('\nperforming tsne with perplexity {} and with {} iterations at max'.format(perplexity, n_iter))
        X_reduced = TSNE(verbose=2, perplexity=perplexity).fit_transform(X_data)
        print('Done..')
        
        # prepare the data for seaborn         
        print('Creating plot for this t-sne visualization..')
        df = pd.DataFrame({'x':X_reduced[:,0], 'y':X_reduced[:,1] ,'label':y_data})
        
        # draw the plot in appropriate place in the grid
        sns.lmplot(data=df, x='x', y='y', hue='label', fit_reg=False, size=8,\
                   palette="Set1",markers=['^','v','s','o', '1','2'])
        plt.title("perplexity : {} and max_iter : {}".format(perplexity, n_iter))
        img_name = img_name_prefix + '_perp_{}_iter_{}.png'.format(perplexity, n_iter)
        print('saving this plot as image in present working directory...')
        plt.savefig(img_name)
        plt.show()
        print('Done')

In [None]:
X_pre_tsne = train.drop(['subject', 'Activity','ActivityName'], axis=1)
y_pre_tsne = train['ActivityName']
perform_tsne(X_data = X_pre_tsne,y_data=y_pre_tsne, perplexities =[2,5,10,20,50])

In [None]:
import numpy as np
import pandas as pd

In [None]:
train = pd.read_csv('UCI_HAR_dataset/csv_files/train.csv')
test = pd.read_csv('UCI_HAR_dataset/csv_files/test.csv')
print(train.shape, test.shape)

In [None]:
train.head(3)

In [None]:
# get X_train and y_train from csv files
X_train = train.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_train = train.ActivityName

In [None]:
# get X_test and y_test from test csv file
X_test = test.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_test = test.ActivityName

In [None]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [None]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [None]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [None]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [None]:
def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [None]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [None]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [None]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [None]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [None]:
# Initializing parameters
epochs = 30
batch_size = 30
# n_hidden = 32

In [None]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
from keras.models import load_model

In [None]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [None]:
# selecting axis specific data
X_train_1 = X_train[:,:,[1,2,3,4,5,6]]
X_test_1 = X_test[:,:,[1,2,3,4,5,6]]

In [None]:
# selecting axis specific data
X_train_2 = X_train[:,:,[1,2,3,4]]
X_test_2 = X_test[:,:,[1,2,3,4]]

In [None]:
# X_train_2 = X_train[:,:,[5,6,7,8]]
# X_test_2 = X_test[:,:,[5,6,7,8]]

In [None]:
# # selecting axis specific data
# X_train_1 = X_train[:,:,[3,4,5,6,7,8]]
# X_train_2 = X_train[:,:,[3,4,6,7]]

In [None]:
# # selecting axis specific data
# X_test_1 = X_test[:,:,[3,4,5,6,7,8]]
# X_test_2 = X_test[:,:,[3,4,6,7]]

In [None]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)
print(timesteps)
print(input_dim)
print(len(X_train))
num_classes = 6

In [None]:
# Test and Train data reshaping for 1st dataset
# input image dimensions

img_rows, img_cols = 128, 9
# an activity is governed by sequence of activities. 8 sequences informations are given to CNN model which will be later given to LSTM unit as a sequence information.
# 7352 = 919*8
X_train = X_train.reshape(919,8,128,9,1)
Y_train = Y_train.reshape(919,8,6)

# removing last 3 data pointjust to make test data sequence compatible
# 2944 = 368*8
X_test = X_test[:-3]
Y_test = Y_test[:-3]

X_test = X_test.reshape(368,8,128,9,1)
Y_test = Y_test.reshape(368,8,6)

# Input shape for model-1
input_shape_1 = ( X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4])
print(input_shape_1)

In [None]:
X_train.shape, X_test.shape

In [None]:
# Test and Train data reshaping for 1st dataset
# input image dimensions

img_rows, img_cols = 128, 6
# an activity is governed by sequence of activities. 8 sequences informations are given to CNN model which will be later given to LSTM unit as a sequence information.
# 7352 = 919*8
X_train_1 = X_train_1.reshape(919,8,128,6,1)

# removing last 3 data pointjust to make test data sequence compatible
# 2944 = 368*8
X_test_1 = X_test_1[:-3]
X_test_1 = X_test_1.reshape(368,8,128,6,1)

# Input shape for model-2
input_shape_2 = (X_train_1.shape[1], X_train_1.shape[2], X_train_1.shape[3], X_train_1.shape[4])
print(input_shape_2)

In [None]:
X_train_1.shape, X_test_1.shape

In [None]:
# Test and Train data reshaping for 1st dataset
# input image dimensions

img_rows, img_cols = 128, 4
# an activity is governed by sequence of activities. 8 sequences are given to CNN model which will be later given to LSTM unit as a sequence information.
# 7352 = 919*8
X_train_2 = X_train_2.reshape(919,8,128,4,1)

# removing last 3 data pointjust to make test data sequence compatible
# 2944 = 368*8
X_test_2 = X_test_2[:-3]
X_test_2 = X_test_2.reshape(368,8,128,4,1)

# Input shape for model-2
input_shape_3 = (X_train_2.shape[1], X_train_2.shape[2], X_train_2.shape[3], X_train_2.shape[4])
print(input_shape_3)

In [None]:
X_train_2.shape, X_test_2.shape

In [None]:
def get_model(shape_cnn, shape_lstm):
    # print(shape_cnn, shape_lstm)
    model = Sequential()
    model.add(TimeDistributed(Conv2D(128, kernel_size=(5,1),  activation='relu', input_shape= shape_cnn)))
    model.add(TimeDistributed(Conv2D(64, (5, 1), activation='relu')))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Conv2D(32, (5, 1), activation='relu')))
    model.add(TimeDistributed(Flatten()))
    # model.add(TimeDistributed(Dense(32, activation='relu')))
    model.add(LSTM(units=64, return_sequences=True, input_shape = shape_lstm))
    model.add(Dense(num_classes, activation='softmax'))
    # compiling the model
    # model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
model_1 = get_model(input_shape_1,(input_shape_1[1],input_shape_1[2]))
model_1.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=1)

In [None]:
model_1.save('CNN_LSTM_Model_1.h5')

In [None]:
score = model_1.evaluate(X_test, Y_test)

In [None]:
score

In [None]:
model_2 = get_model(input_shape_2,(input_shape_2[1],input_shape_2[2]))
model_2.fit(X_train_1, Y_train, batch_size=batch_size, epochs=epochs, verbose=1)
model_2.save('CNN_LSTM_Model_2.h5')

In [None]:
score = model_2.evaluate(X_test_1, Y_test)

In [None]:
score

In [None]:
model_3 = get_model(input_shape_3,(input_shape_3[1],input_shape_3[2]))
model_3.fit(X_train_2, Y_train, batch_size=batch_size, epochs=epochs, verbose=1)
model_3.save('CNN_LSTM_Model_3.h5')

In [None]:
score = model_3.evaluate(X_test_2, Y_test)

In [None]:
score

In [None]:
y_test = Y_test.reshape(2944,6)

y1 = model_1.predict_proba(X_test)
y1 = y1.reshape(2944,6)

y2 = model_2.predict_proba(X_test_1)
y2 = y2.reshape(2944,6)

y3 = model_3.predict_proba(X_test_2)
y3 = y3.reshape(2944,6)

In [None]:
y_pred = []
for i in range(len(y1)):
    tmp = [0,0,0,0,0,0]
    
    op1 = np.amax(y1[i])
    index1 = y1[i].argmax(axis=0)
    tmp[index1] = 1
    
    op2 = np.amax(y2[i])
    index2 = y2[i].argmax(axis=0)
    tmp[index2] = 1
    
    op3 = np.amax(y3[i])
    index3 = y3[i].argmax(axis=0)
    tmp[index3] = 1
    
    l = [op1, op2, op3]
    if sum(tmp)==1:
        y_pred.append(tmp)
    
    else:
        tmp = [0,0,0,0,0,0]
        ind = l.index(max(l))
        if ind == 0:
            tmp[index1] = 1
            y_pred.append(tmp)
        
        elif ind ==1:
            tmp[index2] = 1
            y_pred.append(tmp)
        
        elif ind ==2:
            tmp[index3] = 1
            y_pred.append(tmp)

In [None]:
# Confusion Matrix
y_pred = np.array(y_pred)
print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
# Create function returning a compiled network
def create_network(n_hidden, drop):
    
    # Initiliazing the sequential model
    model = Sequential()
    
    # Configuring the parameters
    model.add(LSTM(units=n_hidden, dropout=drop, return_sequences=True, input_shape=(timesteps, input_dim)))
    model.add(LSTM(units=n_hidden, dropout=drop, return_sequences=False))

    # Adding a dropout layer
    model.add(Dropout(drop))
    # Adding a dense output layer with sigmoid activation
    model.add(Dense(n_classes, activation='sigmoid'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
    # model.summary()

    return model

In [None]:
# Wrap Keras model so it can be used by scikit-learn
neural_network = KerasClassifier(build_fn=create_network, verbose=0, epochs=epochs, batch_size=batch_size)

In [None]:
# Create grid search
grid = GridSearchCV(estimator=neural_network, param_grid=hyperparameters, verbose=10, cv=2)

# Fit grid search
grid_result = grid.fit(X_train, Y_train)
grid_result.best_params_

In [None]:
# Initializing parameters
epochs = 20
batch_size = 50
# n_hidden = 32

In [None]:
n_hidden = 264
drop= 0.7
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(units = n_hidden, return_sequences=True, input_shape=(timesteps, input_dim)))
model.add(Dropout(drop))
model.add(LSTM(units =  n_hidden, return_sequences=False))
# Adding a dropout layer
model.add(Dropout(drop))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

In [None]:
# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
# Training the model
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs)

In [None]:
# Confusion Matrix
print(confusion_matrix(Y_test, model1.predict(X_test)))

In [None]:
score = model1.evaluate(X_test, Y_test)

In [None]:
score

In [None]:
from prettytable import PrettyTable    
x = PrettyTable()
x.field_names = ["Architecture", "Test Accuracy"]
x.add_row(["DNN Fusion", "0.95"])
x.add_row(["LSTM", "0.90"])

print(x)