# Project A15

### Inputs
###### Source: Smartphone 3
###### Signal: accelerometer and gyroscope
###### Validation: 2, 5 and 10 fold subject-wise

### Result Presentation
Compare error based on number of folds in cross validation

In [None]:
# COMMENT: This code is only for single sample code trials. It will be iterated over all the samples at the later stage.

In [1]:
# IMPORT LIBRARIES

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import csv
import os
from pandas import read_csv
import scipy.signal as ss

In [2]:
from IPython.display import clear_output

In [3]:
# Extracting motion sequence

def extract_seq(time, half_width, avg_freq_a):
    time_max = time[int(np.shape(time)[0])-1]
    time_mean = time[int(np.shape(time)[0])-1]/2
    time_max = time_mean + half_width
    time_min = time_mean - half_width
    pos = np.array([0,0])
    pos[0] = int(avg_freq_a*time_min)
    pos[1] = int(avg_freq_a*time_max)
    return pos #returns the indices of the starting point and end point of the extracted sequence


In [4]:
def label(inputs):    
    
    inputs = inputs.lower()
    
    file_split = ((inputs).replace("_"," ")).split() #split the file names for identification
    
    subject = ''.join(filter(lambda j: j.isdigit(), file_split[0])) #extract subject number from name
    
    sample = ''.join(filter(lambda j: j.isdigit(), file_split[1]))
    
    #gait
    if ((inputs.find('nor') or inputs.find('rma') or inputs.find('mal')) != -1):
        gait = 0
        
    elif ((inputs.find('imp') or inputs.find('pai') or inputs.find('red')) != -1):
        gait = 1
        
    else:
        gait = -1 #returns -1 to detect erronous file names
    
    metadata = [subject, gait] #returns an array of subject number and gait (normal = 0, impaired = 1)
    
    #print(file_split)
    #print(subject)
    #print(gait)
    
    
    return metadata

In [7]:
def export_db(files):

    # The function extracts and resamples the imported data to match the database format
    
    sensor = ['\\Accelerometer.csv','\\Gyroscope.csv']
    
    # Read CSV into idividual arrays for accelerometer and gyroscope
    
    path_main_acc = "E:\MME_CAME\STUDY\CIE\PROJECT A\Smartphone3\\" + files + sensor[0]
    df_acc = read_csv(path_main_acc)
    path_main_gyro = "E:\MME_CAME\STUDY\CIE\PROJECT A\Smartphone3\\" + files + sensor[1]
    df_gyro = read_csv(path_main_gyro)
    
    # Calculate start time and end time for extracting data
    
    acc_time = df_acc[df_acc.columns[0]]
    gyro_time = df_gyro[df_gyro.columns[0]]
    
    acc_time_max  = acc_time[int(np.shape(acc_time)[0])-1]
    acc_time_mean = acc_time[int(np.shape(acc_time)[0])-1]/2
    acc_time_max = acc_time_mean + 10
    acc_time_min = acc_time_mean - 10
    
    acc_min  = np.where(round(acc_time,1)  == round(acc_time_min,1))
    acc_max  = np.where(round(acc_time,1)  == round(acc_time_max,1))
    gyro_min = np.where(round(gyro_time,1) == round(acc_time_min,1))
    gyro_max = np.where(round(gyro_time,1) == round(acc_time_max,1))
    
    acc_min  = int(np.mean(acc_min))
    acc_max  = int(np.mean(acc_max))
    gyro_min = int(np.mean(gyro_min))
    gyro_max = int(np.mean(gyro_max))
    
    
    freq = 500 #resampling freqency
    time_window = 20 #Sample time window
    df_samples = time_window*freq
    df = np.zeros((6,df_samples))
    
    for i in range(6):
        
        if (i<3):
            data = np.array(df_acc[df_acc.columns[i+1]])
            data = data[acc_min:acc_max]
            data = ss.resample(data, df_samples)
        
        else:
            data = np.array(df_gyro[df_gyro.columns[i-2]])
            data = data[gyro_min:gyro_max]
            data = ss.resample(data, df_samples)
        
        df[i,:] = data
    
    return df #Resampled data for the given files arranged in 6Xn format where rows are axes of Accelerometer and Gyroscope respectively
    



In [8]:
# DATA PREPROCESSING

# Import folder list
files=[]
files = [f for f in sorted(os.listdir('Smartphone3'))]
fol_len=int(np.shape(files)[0])

main_array = []
label_data = []

# Processing data for every file

for i in range(fol_len):
    
    clear_output(wait=True)
    
    df = export_db(files[i])
    
    #data filtering using SG filter
    
    for k in range(6):
        seq = df[k,:]
        seq = ss.savgol_filter(seq, 101 , 4)
        df[k,:] = seq
    
    #identification of peaks for sengmentation
    
    resultant_acc = ss.savgol_filter(df[3,:], 1001 , 8)
    peaks,_ = ss.find_peaks(resultant_acc,height=-10)
    peaksofpeaks,_ = ss.find_peaks(resultant_acc[peaks])
  
    samples = 300 # define samples in each sequence
    
    seg_start = np.zeros(np.shape(peaks)[0])
    seg_end = np.zeros(np.shape(peaks)[0])
    
    # Segmentation and resampling of data based on identified peaks
    
    for l in range(np.shape(peaks)[0]-1):
        fragment = np.zeros((6, samples))
        for k in range(6):
            seg_start[l] = peaks[l]
            seg_end[l] = peaks[l+1]
            seg = (df[k,:])[int(seg_start[l]):int(seg_end[l])]
            fragment[k,:] = ss.resample(seg, samples)
        
        #writing label and array
        
        label_data.append(label(files[i]))
        main_array.append(fragment)
        
    status=str(round(i*100/fol_len,2))
    print('Status: ',status,'%')

Status:  99.84 %


In [9]:
# prepare labels for splitting train and test data

label_data = np.array(label_data)
main_array = np.array(main_array)
unique_labels = np.unique((label_data)[:,0])
print(unique_labels)
xxx = int(np.shape(unique_labels)[0])
count = []
for i in range(xxx):
    count.append(np.where(label_data == (np.unique((label_data)[:,0])[i])))


['100' '101' '102' '103' '104' '105' '106' '107' '108' '109' '111' '115'
 '116' '117' '118' '119' '125' '126' '127' '128' '129' '130' '131' '132'
 '133' '134' '135' '136' '137' '138' '139' '25' '27' '29' '30' '31' '32'
 '34' '35' '36' '37' '38' '39' '40' '41' '42' '43' '44' '45' '46' '47'
 '48' '49' '50' '53' '54' '55' '56' '57' '58' '59' '60' '61' '62' '63'
 '64' '65' '66' '67' '68' '69' '70' '71' '72' '73' '74' '75' '76' '77'
 '78' '79' '80' '81' '82' '83' '84' '85' '86' '87' '88' '89' '90' '91'
 '92' '93' '94' '95' '96' '97' '98' '99']


In [21]:
# Splitting train and test data as per folds

X_test = []
X_train = []
Y_test = []
Y_train = []

folds = 10 #define number of folds
fraction = 1/folds

for i in range(0,int(xxx*fraction)):
    indices_sub = np.array(np.where(label_data[:,0] == unique_labels[i]))
    #print(np.shape(indices_sub)[1])
    for j in range(np.shape(indices_sub)[1]):
        X_test.append(main_array[indices_sub[:,j],:].reshape(6,300))
        Y_test.append(label_data[indices_sub[:,j],1])
        
for i in range(int(xxx*0.2),xxx):
    indices_sub = np.array(np.where(label_data[:,0] == unique_labels[i]))
    #print(np.shape(indices_sub)[1])
    for j in range(np.shape(indices_sub)[1]):
        X_train.append(main_array[indices_sub[:,j],:].reshape(6,300))
        Y_train.append(label_data[indices_sub[:,j],1])
        
Y_train = np.transpose(Y_train)[0].astype(int)
Y_test = np.transpose(Y_test)[0].astype(int)

In [22]:
# Check data splitting

folds_check = int((np.shape(Y_train)[0]+np.shape(Y_test)[0])/np.shape(Y_test)[0])
print(folds_check)

(18977, 6, 300)
(2113, 6, 300)
(18977,)
(2113,)
9


array([1, 1, 1, ..., 0, 0, 0])

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

X_train = np.array(X_train).astype(float)
X_test = np.array(X_test).astype(float)
Y_train = np.array(Y_train).astype(float)
Y_test = np.array(Y_test).astype(float)


#X_train, X_test, y_train, y_test = train_test_split(main_array, np.transpose(label_data)[1], test_size=0.1)


print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(Y_train))
print(np.shape(Y_test))

(18977, 6, 300)
(2113, 6, 300)
(18977,)
(2113,)


In [None]:
# define model with tensorflow keras 
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(6,300)),
    tf.keras.layers.Dense(128,activation ='sigmoid'),
    tf.keras.layers.Dense(64 ,activation ='sigmoid'),
    tf.keras.layers.Dense(32 ,activation ='sigmoid'),
    tf.keras.layers.Dense(16 ,activation ='sigmoid'),
    tf.keras.layers.Dense(2  , activation = 'softmax')
])


# compile the model, set optimizer, loss and metric functions
model.compile(optimizer = 'adam',
             loss = 'mean_squared_error',
             metrics = ['accuracy'])

# train the neural network for 10 training epochs
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=50, verbose=2)

In [25]:
# evaluate the accuracy
loss, accuracy = model.evaluate(X_test, Y_test, verbose=0);

print('Folds =', folds)
print('Loss =', loss)
print('accuracy =', accuracy)

Folds = 10
Loss = 0.25
accuracy = 0.4723142385482788


In [None]:
# Show the predictions for the test set
predictions = model.predict(X_test[:5])

print(np.round(predictions[4],6))
print(predictions.shape)
# show the image
for i in range(5):
    #plt.plot(X_test[i])
    # Get the label with the highest prediction value
    print('Label = ', np.argmax(predictions[i]))
    #plt.show()
print(y_test[:5])

### Results

Folds = 2
Loss = 0.25002381205558777
accuracy = 0.5051564574241638

Folds = 5
Loss = 0.25
accuracy = 0.5037577152252197

Folds = 10
Loss = 0.25
accuracy = 0.4723142385482788