# CIE Project A - Group 7
## ---> Data processing

### Imports

In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal as sp
from sklearn import preprocessing ##importing for normalization

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler

plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.grid'] = True
plt.rcParams['legend.frameon'] = True
%matplotlib widget

# import tensorflow as tf
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix

#### Get the root directory and change to it

In [2]:
rootdir = os.getcwd()
os.chdir(rootdir)

### Variable Assignment

In [3]:
gyrFile = 'Gyroscope'
accFile = 'Accelerometer'

normal = 'Normal'
upstairs = 'Upstairs'
downstairs = 'Downstairs'

smartphone = [1, 3]

s1 = 'Smartphone1'
s3 = 'Smartphone3'

s = [s1, s3]

gaitpos = [normal, upstairs, downstairs]

### Data Parsing

#### Define variables for raw data

In [4]:
count = 0

# Naming convention - gyr(oscope)_s(martphone_location)) and Same for acceleraoemeter
# All the data is stored in a list of dataframes. So, gyr{gaitpos}_s{martphone_location}[0, 1, 2, .....] gives the data as sorted by name in the file directory.
# gyrNormal_s1[0] gives the gyroscope data of the subject180_Normal sample 01 in the normal gait position.
# gyrNormal_s1[1] gives the gyroscope data of the subject180_Normal sample 02 in the normal gait position.
# 
# gyrNormal_s[2] gives the gyroscope data of the subject181_Normal sample 01 in the normal gait position.
# 
# and so on. 

# Create arrays for Raw data
for x in gaitpos:
    for y in smartphone:
        # variables of raw the data
        globals()['gyr%s_s%s' % (x, y)] = []
        globals()['acc%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

#### Parsing gyroscope and accelerometer data at all gait positions (Smartphone 1 - Rear Right) & (Smartphone 3 - Front Right)

In [5]:
for subdir, dirs, files in os.walk(rootdir):
    for foldername in s:
        for gait in gaitpos:    
        
            gyr = eval(f'gyr{gait}_s{foldername[10]}')
            acc = eval(f'acc{gait}_s{foldername[10]}')
            
            if foldername.lower() in subdir.lower():
                if gait.lower() in subdir.lower():
    
                    for file in files:

                        if gyrFile.lower() in file.lower():
                            count = count + 1
                            # print(f'#{count} -- {os.path.join(subdir, file)}')

                            with open(os.path.join(subdir, file), 'r') as f: # open in read-only mode
                                gyrData = pd.read_csv(f)
                                # Assemble the data into a tensor
                                gyr.append(gyrData)

                        if accFile in file:
                            count = count + 1
                            # print(f'#{count} -- {os.path.join(subdir, file)}')

                            with open(os.path.join(subdir, file), 'r') as f: # open in read-only mode
                                accData = pd.read_csv(f)
                                # Assemble the data into a tensor
                                acc.append(accData)

#### Check if the data is collected correctly after parsing

In [6]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')

            print(f'Shape of gyr{j}_s{i} is ------ {np.asarray((gyrl), dtype = list).shape}')
            print(f'Shape of acc{j}_s{i} is ------ {np.asarray((accl), dtype = list).shape} \n')

Shape of gyrNormal_s1 is ------ (8,)
Shape of accNormal_s1 is ------ (8,) 

Shape of gyrNormal_s3 is ------ (8,)
Shape of accNormal_s3 is ------ (8,) 

Shape of gyrUpstairs_s1 is ------ (8,)
Shape of accUpstairs_s1 is ------ (8,) 

Shape of gyrUpstairs_s3 is ------ (12,)
Shape of accUpstairs_s3 is ------ (12,) 

Shape of gyrDownstairs_s1 is ------ (8,)
Shape of accDownstairs_s1 is ------ (8,) 

Shape of gyrDownstairs_s3 is ------ (11,)
Shape of accDownstairs_s3 is ------ (11,) 



### Data Validation and Preprocessing

#### -- No need to run -- Frequency check of all the data before filtering

In [7]:
freqGyr = []
freqAcc = []

# Loop through the variables and check if all the data is collected
for j in gaitpos:
    for i in smartphone:
        # Calculating gyroscope frequency before preprocessing
        gyrl = eval(f'gyr{j}_s{i}')
        ng = len(gyrl)

        # loop to calculate the frequency over all the data
        for s in range(0, ng):
            time = np.asarray(gyrl[s].iloc[:,0])

            freqG = np.round(len(time)/(time[-1]))
            freqGyr.append(freqG)
        
        # Calculating accelerometer frequency before preprocessing
        accl = eval(f'acc{j}_s{i}')
        na = len(accl)

        # loop to calculate the frequency over all the data
        for s in range(0, na):
            time = np.asarray(accl[s].iloc[:,0])

            freqA = np.round(len(time)/(time[-1]))
            freqAcc.append(freqA)

for i in range(0, len(freqGyr)):
    if freqGyr[i] < 10:
        print(f'Position of the frequency issue is at freqGyr:{i} and the frequency is {freqGyr[i]}')
for j in range(0, len(freqAcc)):
    if freqAcc[j] < 10:
        print(f'Position of the frequency issue is at freqAcc:{j} and the frequency is {freqAcc[j]}')

### Filter Data

#### Lowpass filter (Butterworth) function

In [8]:
#Filter the data to remove noise

# inputs for the function is the data, the sampling frequency, the cutoff frequency.
def dataFilter(dataInput, fs=200, fc=5, order = 2):
    Filtrd = np.zeros(dataInput.shape)
    w = fc / (fs * 0.5)
    b, a = sp.butter(fc, w, 'low')
    
    #Filter the X, Y and Z axis of the input data
    for i in range(0, 3):
        Filtrd[:,i] = sp.filtfilt(b, a, dataInput.iloc[:,i+1])

    return Filtrd

##### -- Different filter check -- Chebyshev filter

In [9]:
'''
# chebyshev filter for the accelerometer data
def chebyFilter(dataInput, fs=200, fc=5, order = 2):
    Filtrd = np.zeros(dataInput.shape)
    w = fc / (fs * 0.5)
    b, a = sp.cheby2(order, 20, w, 'low')
    
    #Filter the X, Y and Z axis of the input data
    for i in range(0, 3):
        Filtrd[:,i] = sp.filtfilt(b,a, dataInput.iloc[:,i+1])
    
    return Filtrd
'''

"\n# chebyshev filter for the accelerometer data\ndef chebyFilter(dataInput, fs=200, fc=5, order = 2):\n    Filtrd = np.zeros(dataInput.shape)\n    w = fc / (fs * 0.5)\n    b, a = sp.cheby2(order, 20, w, 'low')\n    \n    #Filter the X, Y and Z axis of the input data\n    for i in range(0, 3):\n        Filtrd[:,i] = sp.filtfilt(b,a, dataInput.iloc[:,i+1])\n    \n    return Filtrd\n"

##### -- Test -- the dataFilter function and its dependency on the frequency input

In [10]:
'''gyrF = []
accF = []
ng = len(gyrNormal_s1)
na = len(accNormal_s1)

for i in range(0, ng):
    timeg = np.asarray(gyrNormal_s1[i].iloc[:,0])
    freqG = np.round(len(timeg)/(timeg[-1]))
    gyrF.append(freqG)

for j in range(0, na):
    timea = np.asarray(accNormal_s1[j].iloc[:,0])
    freqA = np.round(len(timea)/(timea[-1]))
    accF.append(freqA)

gyrFilt = []
accFilt = []

for rg in range(0, ng):
    gf = dataFilter(gyrNormal_s1[rg], fs = 100)
    gyrFilt.append(gf)

for ra in range(0, na):
    af = dataFilter(accNormal_s1[ra], fs = 100)
    accFilt.append(af)

t1 = np.asarray(gyrNormal_s1[0].iloc[:,0])
f1 = np.round(len(t1)/(t1[-1]))

xyz = dataFilter(gyrNormal_s1[0], fs = f1)

plt.close('all')
plt.plot(gyrNormal_s1[0].iloc[:,1], 'r')
plt.plot(xyz[:,0], 'b')
plt.show()'''

"gyrF = []\naccF = []\nng = len(gyrNormal_s1)\nna = len(accNormal_s1)\n\nfor i in range(0, ng):\n    timeg = np.asarray(gyrNormal_s1[i].iloc[:,0])\n    freqG = np.round(len(timeg)/(timeg[-1]))\n    gyrF.append(freqG)\n\nfor j in range(0, na):\n    timea = np.asarray(accNormal_s1[j].iloc[:,0])\n    freqA = np.round(len(timea)/(timea[-1]))\n    accF.append(freqA)\n\ngyrFilt = []\naccFilt = []\n\nfor rg in range(0, ng):\n    gf = dataFilter(gyrNormal_s1[rg], fs = 100)\n    gyrFilt.append(gf)\n\nfor ra in range(0, na):\n    af = dataFilter(accNormal_s1[ra], fs = 100)\n    accFilt.append(af)\n\nt1 = np.asarray(gyrNormal_s1[0].iloc[:,0])\nf1 = np.round(len(t1)/(t1[-1]))\n\nxyz = dataFilter(gyrNormal_s1[0], fs = f1)\n\nplt.close('all')\nplt.plot(gyrNormal_s1[0].iloc[:,1], 'r')\nplt.plot(xyz[:,0], 'b')\nplt.show()"

#### Filtering all the data

In [11]:
# Create arrays for Filtered data
for x in gaitpos:
    for y in smartphone:
        # variables of Filtered data
        globals()['gyrFiltered%s_s%s' % (x, y)] = []
        globals()['accFiltered%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

for j in gaitpos:
    for i in smartphone:
            # variables of raw the data
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')
            
            # variables of filtered the data
            gyrF = eval(f'gyrFiltered{j}_s{i}')
            accF = eval(f'accFiltered{j}_s{i}')

            # getting the shape of the gyroscope data (first integer)
            ng = len(gyrl)

            # Getting the shape of the accelerometer data (first integer)
            na = len(accl)


            # Filter all the data
            for rg in range(0, ng):
                timeg = np.asarray(gyrl[rg].iloc[:,0])
                fG = np.round(len(timeg)/(timeg[-1]))

                gf = dataFilter(gyrl[rg], fs = fG)
                gyrF.append(gf)

            for ra in range(0, na):
                timea = np.asarray(accl[ra].iloc[:,0])
                fA = np.round(len(timea)/(timea[-1]))

                af = dataFilter(accl[ra], fs = fA)
                accF.append(af)

#### Check if all the data is collected correctly after filtering

In [12]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyrl = eval(f'gyrFiltered{j}_s{i}')
            accl = eval(f'accFiltered{j}_s{i}')

            print(f'Shape of gyrFiltered{j}_s{i} is ------ {np.asarray((gyrl), dtype = list).shape}')
            print(f'Shape of accFiltered{j}_s{i} is ------ {np.asarray((accl), dtype = list).shape} \n')

Shape of gyrFilteredNormal_s1 is ------ (8,)
Shape of accFilteredNormal_s1 is ------ (8,) 

Shape of gyrFilteredNormal_s3 is ------ (8,)
Shape of accFilteredNormal_s3 is ------ (8,) 

Shape of gyrFilteredUpstairs_s1 is ------ (8,)
Shape of accFilteredUpstairs_s1 is ------ (8,) 

Shape of gyrFilteredUpstairs_s3 is ------ (12,)
Shape of accFilteredUpstairs_s3 is ------ (12,) 

Shape of gyrFilteredDownstairs_s1 is ------ (8,)
Shape of accFilteredDownstairs_s1 is ------ (8,) 

Shape of gyrFilteredDownstairs_s3 is ------ (11,)
Shape of accFilteredDownstairs_s3 is ------ (11,) 



#### Delete extra -null- column on the right

In [13]:
for j in gaitpos:
    for i in smartphone:
        gyrl = eval(f'gyrFiltered{j}_s{i}')
        accl = eval(f'accFiltered{j}_s{i}')
        for k in range(0, len(gyrl)):
            d = np.delete(gyrl[k], 3, axis = 1)
            e = np.delete(accl[k], 3, axis = 1)
            gyrl[k] = d
            accl[k] = e

### Cut data

#### Cut data function

In [14]:
def cutData(gyrFilt, accFilt, freq):
    acc_abs = np.linalg.norm(accFilt, axis=1)
    
    ht = np.round(np.mean(acc_abs[int(0.3*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)

    peaks, _ = sp.find_peaks(acc_abs, height = -10, distance=freq*0.5)
    diff_peaks = np.diff(peaks)
    gap1 = np.argmax(diff_peaks[:int(0.4*(len(diff_peaks)))])
    gap2 = np.argmax(diff_peaks[-int(0.4*len(diff_peaks)):])
    
    gap2 = int(np.shape(diff_peaks)[0] - int(0.4*len(diff_peaks)) + gap2)

    gyr_cut = gyrFilt[peaks[gap1+1]:peaks[gap2], :]
    acc_cut = accFilt[peaks[gap1+1]:peaks[gap2], :]

    return gyr_cut, acc_cut

#### -- check -- Cut data function

In [15]:
'''
gyrNormalCtted, accNormalCtted = [], []

for x in range(0, len(gyrNormal_s3)):
    num = x
    
    nq = len(accNormal_s3[num])
    tq = np.asarray(gyrNormal_s3[num].iloc[:,0])
    freq = np.round(len(tq)/(tq[-1]))
    # print(f'The frequency of the data is {freq}')

    acc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)

    # ht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)
    # ht = np.round(np.mean(acc_abs)).astype(int)
    # print(f'The average height of the data is {ht}')
    peaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)
    # print(f'The number of peaks is {(peaks)}')

    diff_peaks = np.diff(peaks)
    # print(f'The difference of peaks is {(diff_peaks)}')

    gap1 = np.argmax(diff_peaks[:20])
    # print(f'The first gap is {gap1}')

    gap2 = np.argmax(diff_peaks[-10:])
    gap2 = int(np.shape(diff_peaks)[0] - 10 + gap2)
    # print(f'The second gap is {gap2}')

    gyr_cut = gyrFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]
    acc_cut = accFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]

    gyrNormalCtted.append(gyr_cut)
    accNormalCtted.append(acc_cut)
'''

"\ngyrNormalCtted, accNormalCtted = [], []\n\nfor x in range(0, len(gyrNormal_s3)):\n    num = x\n    \n    nq = len(accNormal_s3[num])\n    tq = np.asarray(gyrNormal_s3[num].iloc[:,0])\n    freq = np.round(len(tq)/(tq[-1]))\n    # print(f'The frequency of the data is {freq}')\n\n    acc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)\n\n    # ht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)\n    # ht = np.round(np.mean(acc_abs)).astype(int)\n    # print(f'The average height of the data is {ht}')\n    peaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)\n    # print(f'The number of peaks is {(peaks)}')\n\n    diff_peaks = np.diff(peaks)\n    # print(f'The difference of peaks is {(diff_peaks)}')\n\n    gap1 = np.argmax(diff_peaks[:20])\n    # print(f'The first gap is {gap1}')\n\n    gap2 = np.argmax(diff_peaks[-10:])\n    gap2 = int(np.shape(diff_peaks)[0] - 10 + gap2)\n    # print(f'The second gap is {gap2}')\n\n    gyr_cut = 

#### -- Test -- Cut Data line by line

In [16]:
'''
num = 7

nq = len(accNormal_s3[num])
tq = np.asarray(gyrNormal_s3[num].iloc[:,0])
freq = np.round(len(tq)/(tq[-1]))
print(f'The frequency of the data is {freq}')

acc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)

ht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)
# ht = np.round(np.mean(acc_abs)).astype(int)
print(f'The average height of the data is {ht}')
peaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)
print(f'The number of peaks is {(peaks)}')

diff_peaks = np.diff(peaks)
print(f'The difference of peaks is {(diff_peaks)}')

gap1 = np.argmax(diff_peaks[:int(0.1*(len(diff_peaks)))])
print(f'The first gap is {gap1}')

gap2 = np.argmax(diff_peaks[-int(0.1*len(diff_peaks)):])
gap2 = int(np.shape(diff_peaks)[0] - int(0.2*len(diff_peaks)) + gap2)
print(f'The second gap is {gap2}')

gyr_cut = gyrFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]
acc_cut = accFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]

plt.close()
# plt.plot(gyrFilteredNormal_s1[1][:,0], label='Raw Data', c = 'b')

plt.plot(acc_abs, label = 'Accelerometer Absolute Data', c = 'grey')
plt.plot(np.linspace(0, len(gyrNormal_s3[num])), np.full_like(np.linspace(0, len(gyrNormal_s3[num])), ht), label = 'Average Height', c = 'r')

plt.plot(peaks, acc_abs[peaks], 'x', label = 'Peaks', c = 'y')
plt.plot(np.linspace(0, len(gyrNormal_s3[num])), np.zeros_like(np.linspace(0, len(gyrNormal_s3[num]))), linestyle = "--", linewidth = 0.2, dashes=(5, 15), color="black")

plt.plot(peaks[gap1+1], acc_abs[peaks[gap1+1]], 'o', label = 'Gap 1', c = 'g')
plt.plot(peaks[gap2], acc_abs[peaks[gap2]], 'o', label = 'Gap 2', c = 'black')

plt.plot(gyr_cut[:,0], label = 'Gyr Cut', c='r')

plt.legend()
plt.grid()
plt.show()
'''

'\nnum = 7\n\nnq = len(accNormal_s3[num])\ntq = np.asarray(gyrNormal_s3[num].iloc[:,0])\nfreq = np.round(len(tq)/(tq[-1]))\nprint(f\'The frequency of the data is {freq}\')\n\nacc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)\n\nht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)\n# ht = np.round(np.mean(acc_abs)).astype(int)\nprint(f\'The average height of the data is {ht}\')\npeaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)\nprint(f\'The number of peaks is {(peaks)}\')\n\ndiff_peaks = np.diff(peaks)\nprint(f\'The difference of peaks is {(diff_peaks)}\')\n\ngap1 = np.argmax(diff_peaks[:int(0.1*(len(diff_peaks)))])\nprint(f\'The first gap is {gap1}\')\n\ngap2 = np.argmax(diff_peaks[-int(0.1*len(diff_peaks)):])\ngap2 = int(np.shape(diff_peaks)[0] - int(0.2*len(diff_peaks)) + gap2)\nprint(f\'The second gap is {gap2}\')\n\ngyr_cut = gyrFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]\nacc_cut = accFilteredNormal_s3[num][pea

#### Cut all the data

In [17]:
# Create arrays for Cut data
for x in gaitpos:
    for y in smartphone:
        # variables of Cut data
        globals()['gyrCut%s_s%s' % (x, y)] = []
        globals()['accCut%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

for j in gaitpos:
    for i in smartphone:
            # variables of raw the data
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')
            
            # variables of filtered the data
            gyrF = eval(f'gyrFiltered{j}_s{i}')
            accF = eval(f'accFiltered{j}_s{i}')

            # variables of cutted the data
            gyrC = eval(f'gyrCut{j}_s{i}')
            accC = eval(f'accCut{j}_s{i}')

            # getting the shape of the gyroscope and accelerometer data
            n = len(gyrF)

            # Filter all the data
            for r in range(0, n):
                
                tCut = np.asarray(gyrl[r].iloc[:,0])
                freqCut = np.round(len(tCut)/(tCut[-1]))
                # print(f'accFiltered{j}_s{i} - {[r]}')
                gf, aF = cutData(gyrF[r], accF[r], freqCut)

                gyrC.append(gf)
                accC.append(aF)

#### check if all the data is collected correctly after cutting

In [18]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyr = eval(f'gyrCut{j}_s{i}')
            acc = eval(f'accCut{j}_s{i}')

            print(f'Shape of gyrCut{j}_s{i} is ------ {np.asarray((gyr), dtype = list).shape}')
            print(f'Shape of accCut{j}_s{i} is ------ {np.asarray((acc), dtype = list).shape} \n')

Shape of gyrCutNormal_s1 is ------ (8,)
Shape of accCutNormal_s1 is ------ (8,) 

Shape of gyrCutNormal_s3 is ------ (8,)
Shape of accCutNormal_s3 is ------ (8,) 

Shape of gyrCutUpstairs_s1 is ------ (8,)
Shape of accCutUpstairs_s1 is ------ (8,) 

Shape of gyrCutUpstairs_s3 is ------ (12,)
Shape of accCutUpstairs_s3 is ------ (12,) 

Shape of gyrCutDownstairs_s1 is ------ (8,)
Shape of accCutDownstairs_s1 is ------ (8,) 

Shape of gyrCutDownstairs_s3 is ------ (11,)
Shape of accCutDownstairs_s3 is ------ (11,) 



#### Plot raw data, filtered data, and cut data

In [19]:
# pos = 'Normal', 'Upstairs', 'Downstairs'
pos = 'Normal'
# loc = 1, 3
loc = 3
# no = 0-7
no = 5
# axis = 0, 1, 2
axis = 0

plt.close()
fig, axs = plt.subplots(3, 1, sharex=True, sharey=True)

axs[0].plot(eval(f'gyr{pos}_s{loc}')[no].iloc[:,(axis+1)], label='Raw Data', c = 'c')
axs[1].plot(eval(f'gyrFiltered{pos}_s{loc}')[no][:,axis], label='Filtered Data', c = 'b')
axs[2].plot(eval(f'gyrCut{pos}_s{loc}')[no][:,axis], label = 'Gyr Cutted Data', c = 'r')
# plt.plot(eval(f'gyrResample{pos}_s{loc}')[no][:,axis], label = 'Gyr Resampled Data', c = 'g')

axs[0].legend()
axs[1].legend()
axs[2].legend()
plt.show()

#### -- Test -- Save all cut data to plots

In [20]:
'''
for j in gaitpos:
    for i in smartphone:
        # variables of raw the data
        gyrl = eval(f'gyr{j}_s{i}')
        accl = eval(f'acc{j}_s{i}')
        # variables of filtered the data
        gyrF = eval(f'gyrFiltered{j}_s{i}')
        accF = eval(f'accFiltered{j}_s{i}')
        # variables of cutted the data
        gyrC = eval(f'gyrCut{j}_s{i}')
        accC = eval(f'accCut{j}_s{i}')

        for rg in range(0, len(gyrF)):
            plt.close()
            plt.plot(gyrl[rg].iloc[:,1], label='Raw Gyroscope Data', c = 'c')
            # plt.plot(accl[rg].iloc[:,1], label='Raw Accelerometer Data', c = 'm')

            plt.plot(gyrF[rg][:,0], label='Filtered Gyroscope Data', c = 'b')
            # plt.plot(accF[rg][:,0], label='Filtered Accelerometer Data', c = 'g')

            plt.plot(gyrC[rg][:,0], label='Gyroscope Cut Data', c = 'y')
            # plt.plot(accC[rg][:,0], label = 'Accelerometer Cut Data', c = 'r')

            plt.legend()
            plt.grid()
            plt.ioff()
            # plt.savefig(rootdir+f'/2 Accelerometer_Pictures/acc{j}_s{i}_{rg}.png')
            plt.savefig(rootdir+f'/1 Gyroscope_Pictures/gyr{j}_s{i}_{rg}.png')
'''

"\nfor j in gaitpos:\n    for i in smartphone:\n        # variables of raw the data\n        gyrl = eval(f'gyr{j}_s{i}')\n        accl = eval(f'acc{j}_s{i}')\n        # variables of filtered the data\n        gyrF = eval(f'gyrFiltered{j}_s{i}')\n        accF = eval(f'accFiltered{j}_s{i}')\n        # variables of cutted the data\n        gyrC = eval(f'gyrCut{j}_s{i}')\n        accC = eval(f'accCut{j}_s{i}')\n\n        for rg in range(0, len(gyrF)):\n            plt.close()\n            plt.plot(gyrl[rg].iloc[:,1], label='Raw Gyroscope Data', c = 'c')\n            # plt.plot(accl[rg].iloc[:,1], label='Raw Accelerometer Data', c = 'm')\n\n            plt.plot(gyrF[rg][:,0], label='Filtered Gyroscope Data', c = 'b')\n            # plt.plot(accF[rg][:,0], label='Filtered Accelerometer Data', c = 'g')\n\n            plt.plot(gyrC[rg][:,0], label='Gyroscope Cut Data', c = 'y')\n            # plt.plot(accC[rg][:,0], label = 'Accelerometer Cut Data', c = 'r')\n\n            plt.legend()\n      

### Cut data of each single motion

#### 2nd Cut data function

In [21]:
def cut2Data(gyrCut, accCut, freq):
    # axis to consider for the cut data again (0, 1, 2)
    axis = 1

    peaks, _ = sp.find_peaks(gyrCut[:,axis], height = 0, distance=freq*0.5)
    
    gyr_2cut = []
    acc_2cut = []
    
    for i in range(0, len(peaks)-1):
           
        gyr = gyrCut[peaks[i]:peaks[i+1], :]
        acc = accCut[peaks[i]:peaks[i+1], :]

        gyr_2cut.append(gyr)
        acc_2cut.append(acc)

    return gyr_2cut, acc_2cut

#### -- Check -- cut data algorithm

In [22]:
'''
axis = 1
tCut = np.asarray(gyrNormal_s3[0].iloc[:,0])
freq = np.round(len(tCut)/(tCut[-1]))
gyr2cut = []
acc2cut = []

peaks, _ = sp.find_peaks(gyrCutNormal_s3[0][:,axis], height = -10, distance=freq*0.5)
for i in range(0, len(peaks)-1):

    gyr = gyrCutNormal_s3[0][peaks[i]:peaks[i+1], :]
    acc = accCutNormal_s3[0][peaks[i]:peaks[i+1], :]

    gyr2cut.append(gyr)
    acc2cut.append(acc)
'''

'\naxis = 1\ntCut = np.asarray(gyrNormal_s3[0].iloc[:,0])\nfreq = np.round(len(tCut)/(tCut[-1]))\ngyr2cut = []\nacc2cut = []\n\npeaks, _ = sp.find_peaks(gyrCutNormal_s3[0][:,axis], height = -10, distance=freq*0.5)\nfor i in range(0, len(peaks)-1):\n\n    gyr = gyrCutNormal_s3[0][peaks[i]:peaks[i+1], :]\n    acc = accCutNormal_s3[0][peaks[i]:peaks[i+1], :]\n\n    gyr2cut.append(gyr)\n    acc2cut.append(acc)\n'

In [23]:
'''
plt.close()
fig, axs = plt.subplots(2, 1, sharex=True)

axs[0].plot(gyrCutNormal_s3[0][:,1], label = 'Cut Gyroscope Data', c = 'c')
axs[0].plot(peaks, gyrCutNormal_s3[0][:,1][peaks], 'x', label = 'Peaks', c = 'r')
axs[1].plot(gyr2cut[0][:,1], label = '2nd Cut Gyroscope Data', c = 'b')

plt.legend()
plt.show()
'''

"\nplt.close()\nfig, axs = plt.subplots(2, 1, sharex=True)\n\naxs[0].plot(gyrCutNormal_s3[0][:,1], label = 'Cut Gyroscope Data', c = 'c')\naxs[0].plot(peaks, gyrCutNormal_s3[0][:,1][peaks], 'x', label = 'Peaks', c = 'r')\naxs[1].plot(gyr2cut[0][:,1], label = '2nd Cut Gyroscope Data', c = 'b')\n\nplt.legend()\nplt.show()\n"

#### 2nd Cut all the data

In [24]:
# Create arrays for 2nd Cut data
for x in gaitpos:
    for y in smartphone:
        # variables of raw the data
        globals()['gyr2Cut%s_s%s' % (x, y)] = []
        globals()['acc2Cut%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

for j in gaitpos:
    for i in smartphone:
        # variables of raw the data
        gyrl = eval(f'gyr{j}_s{i}')
        accl = eval(f'acc{j}_s{i}')
        
        gyrc = eval(f'gyrCut{j}_s{i}')
        accc = eval(f'accCut{j}_s{i}')
        
        gyr2c = eval(f'gyr2Cut{j}_s{i}')
        acc2c = eval(f'acc2Cut{j}_s{i}')

        # Filter all the data
        for r in range(0, len(gyrc)):
            
            tCut = np.asarray(gyrl[r].iloc[:,0])
            freqCut = np.round(len(tCut)/(tCut[-1]))
            
            gf, aF = cut2Data(gyrc[r], accc[r], freqCut)

            gyr2c.append(gf)
            acc2c.append(aF)

#### Plot to check the 2nd cut data

In [25]:
# print(peaks)
tCut = np.asarray(gyrNormal_s3[1].iloc[:,0])
freqCut = np.round(len(tCut)/(tCut[-1]))
peaks, _ = sp.find_peaks(gyrCutNormal_s3[1][:,1], height = 0, distance=freqCut*0.6)

plt.close()
fig, axs = plt.subplots(2, 1, sharex = True)

axs[0].plot(gyrCutNormal_s3[1][:,1], label = 'Cut Gyroscope Data', c = 'c')
axs[0].plot(peaks, gyrCutNormal_s3[1][:,1][peaks], 'x', label = 'Peaks', c = 'r')
axs[1].plot(gyr2CutNormal_s3[1][0][:,1], label = '2nd Cut Gyroscope Data', c = 'b')

# axs[0].legend()
axs[1].legend()
plt.show()

### Resample data

#### Resample all the data

In [26]:
# Create arrays for resampled data
for x in gaitpos:
    for y in smartphone:
        # variables of raw the data
        globals()['gyrResample%s_s%s' % (x, y)] = []
        globals()['accResample%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

size = 50

for j in gaitpos:
    for i in smartphone:
            gyrl = eval(f'gyr2Cut{j}_s{i}')
            accl = eval(f'acc2Cut{j}_s{i}')

            gyrrs = eval(f'gyrResample{j}_s{i}')
            accrs = eval(f'accResample{j}_s{i}')

            for rg in range(0, len(gyrl)):
                gr = []; ar = []
                for rp in range(0, len(gyrl[rg])):

                    g = sp.resample(gyrl[rg][rp], size, axis = 0)
                    a = sp.resample(accl[rg][rp], size, axis = 0)
                    
                    gr.append(g)
                    ar.append(a)

                gyrrs.append(gr)
                accrs.append(ar)

#### -- Sample Code -- Test code for resampling

In [27]:
'''
q = []
for j in range(0, len(gyr2CutNormal_s3)):
    p = []
    for i in range(0, len(gyr2CutNormal_s3[j])):
        x = sp.resample(gyr2CutNormal_s3[j][i], 50, axis = 0)
        p.append(x)
    q.append(p)
'''

'\nq = []\nfor j in range(0, len(gyr2CutNormal_s3)):\n    p = []\n    for i in range(0, len(gyr2CutNormal_s3[j])):\n        x = sp.resample(gyr2CutNormal_s3[j][i], 50, axis = 0)\n        p.append(x)\n    q.append(p)\n'

#### Check if all the data is collected properly after resampling

In [28]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyr = eval(f'gyrResample{j}_s{i}')
            acc = eval(f'accResample{j}_s{i}')

            print(f'Shape of gyrResample{j}_s{i} is ------ {np.asarray((gyr), dtype = list).shape}')
            print(f'Shape of accResample{j}_s{i} is ------ {np.asarray((acc), dtype = list).shape} \n')

Shape of gyrResampleNormal_s1 is ------ (8,)
Shape of accResampleNormal_s1 is ------ (8,) 

Shape of gyrResampleNormal_s3 is ------ (8,)
Shape of accResampleNormal_s3 is ------ (8,) 

Shape of gyrResampleUpstairs_s1 is ------ (8,)
Shape of accResampleUpstairs_s1 is ------ (8,) 

Shape of gyrResampleUpstairs_s3 is ------ (12,)
Shape of accResampleUpstairs_s3 is ------ (12,) 

Shape of gyrResampleDownstairs_s1 is ------ (8,)
Shape of accResampleDownstairs_s1 is ------ (8,) 

Shape of gyrResampleDownstairs_s3 is ------ (11,)
Shape of accResampleDownstairs_s3 is ------ (11,) 



#### Plot to compare the cut data, and resampled data

In [29]:
pos = 'Normal'  # pos = 'Normal', 'Upstairs', 'Downstairs'
loc = 3         # loc = 1, 3
no = 1          # no = 0-7
axis = 1        # axis = 0, 1, 2

# print(peaks)
tCut = np.asarray(eval(f'gyr{pos}_s{loc}')[no].iloc[:,0])
freqCut = np.round(len(tCut)/(tCut[-1]))
peaks, _ = sp.find_peaks(eval(f'gyrCut{pos}_s{loc}')[no][:,axis], height = 0, distance=freqCut*0.5)

plt.close()
fig, axs = plt.subplots(2,1, sharex=True)
fig.suptitle('Comparision of Resampled Data')

axs[0].plot(eval(f'gyrCut{pos}_s{loc}')[no][:,axis], label = 'Gyr Cutted Data', c = 'b')
axs[0].plot(peaks, eval(f'gyrCut{pos}_s{loc}')[no][:,axis][peaks], 'x', label = 'Peaks', c = 'r')
axs[1].plot(eval(f'gyrResample{pos}_s{loc}')[no][0][:,axis], label = 'Gyr Resampled Data', c = 'g')

axs[0].legend()
axs[1].legend()
plt.show()

### Combine data of all subjects

#### Create Variables for the combined data

In [30]:
# Create arrays for resampled data
for x in gaitpos:
    for y in smartphone:
        # variables of raw the data
        globals()['%s_s%s' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

#### Combining gyr and acc data

In [31]:
for j in gaitpos:
    for i in smartphone:
        gyr = eval(f'gyrResample{j}_s{i}')
        acc = eval(f'accResample{j}_s{i}')

        r = eval(f'{j}_s{i}')

        for y in range(0, len(gyr)):    
            o = []
            for x in range(0, len(gyr[y])):
                v = np.concatenate((gyr[y][x],acc[y][x]), axis = 1)
                o.append(v)
            r.append(o)

### PCA

#### run PCA on all the data

In [32]:
## Run PCA on the data
for j in gaitpos:
    for i in smartphone:
        phone = eval(f'{j}_s{i}')
        
        for x in range(0, len(phone)):
            for y in range(0, len(phone[x])):
                p = pd.DataFrame(phone[x][y])
                scale = StandardScaler().fit_transform(p)

                pca = PCA(n_components = 6)
                pca.fit(scale)
                phone[x][y] = pca.transform(scale)

#### combine all the data

In [33]:
var = np.round(pca.explained_variance_ratio_, decimals=3)*100
labels = [f'PC{x}' for x in range(1, len(var)+1)]


pcaDf = pd.DataFrame(columns = labels)
for j in gaitpos:
    for i in smartphone:
        phone = eval(f'{j}_s{i}')
        for x in range(0, len(phone)):
            xqc = pd.DataFrame(columns=labels)
            for y in range(0, len(phone[x])):
                pc = pd.DataFrame(phone[x][y], columns = labels)
                xqc = pd.concat([xqc, pc], ignore_index = True, axis = 0)
            pcaDf = pd.concat([pcaDf,xqc], ignore_index=True, axis = 0)

#### Check the scree plot

In [34]:
plt.close()
plt.bar(x = range(1, len(var)+1), height = var, tick_label = labels)
plt.ylabel('Percentage of Explained Variance')
plt.xlabel('Principal Component')
plt.title('SCREE PLOT')
plt.show()

#### -- Check -- which axis data is clustering from the scatter plot

In [35]:
'''
loadingScores = pd.Series(phone[0][0][:,0], index = np.linspace(0,49,50))
sortedLoadingScores = loadingScores.abs().sort_values(ascending=False)

topData = sortedLoadingScores[0:10].index.values

print('Top two principal components are -')
print(loadingScores[topData])
'''

"\nloadingScores = pd.Series(phone[0][0][:,0], index = np.linspace(0,49,50))\nsortedLoadingScores = loadingScores.abs().sort_values(ascending=False)\n\ntopData = sortedLoadingScores[0:10].index.values\n\nprint('Top two principal components are -')\nprint(loadingScores[topData])\n"

#### Scatter plot

In [36]:
plt.close()
plt.scatter(pcaDf.PC1, pcaDf.PC2, cmap = 'viridis', alpha = 0.3)

plt.title('Principal Component Analysis')
plt.xlabel(f'PC1 - {var[0]}%')
plt.ylabel(f'PC2 - {var[1]}%')
plt.show()

# for name in pcaDf.index:
#     plt.annotate(name, (pcaDf.PC1[name], pcaDf.PC2[name]))

#### Normalize the data

In [79]:
# Create arrays for normalized data
for x in gaitpos:
    for y in smartphone:
        # variables of raw the data
        globals()['%s_s%s_n' % (x, y)] = []
        globals()['%s_s%s_n' % (x, y)] = []
        globals()['%s_s%s_gait' % (x, y)] = []
##---------------------------------------------------------------------------------------------------------------------

for j in gaitpos:
    for i in smartphone:
        ss = eval(f'{j}_s{i}')
        ss_n = eval(f'{j}_s{i}_n')
        ss_gait=eval(f'{j}_s{i}_gait')
        for y in range(0, len(ss)):
            o = []
            for x in range(0, len(ss[y])):
                v = ss[y][x]
                v_n = preprocessing.normalize(v, norm = 'l2', axis = 1)
                o.append(v_n)
            ss_gait.append(j)
            ss_n.append(o)
Normal_s3_n

[[array([[-5.99322931e-01, -6.79540179e-01, -3.84765197e-01,
          -1.29102538e-01, -1.14387939e-01,  3.52256577e-02],
         [-5.53239102e-01, -7.37518194e-01, -3.44350774e-01,
          -1.24887633e-01, -1.23874055e-01,  2.17773174e-02],
         [-5.81459349e-01, -7.31825046e-01, -3.07657619e-01,
          -1.38081642e-01, -1.11374932e-01,  1.45945853e-02],
         [-5.92996174e-01, -7.42275312e-01, -2.57259578e-01,
          -1.44452759e-01, -1.01411849e-01,  7.03175688e-03],
         [-6.05215433e-01, -7.52595362e-01, -1.94612952e-01,
          -1.47509412e-01, -8.76363912e-02,  1.06597009e-03],
         [-6.22247598e-01, -7.56341108e-01, -1.20693746e-01,
          -1.47033146e-01, -6.74692120e-02, -4.27013572e-03],
         [-6.19427356e-01, -7.70814111e-01, -3.94853900e-02,
          -1.35094145e-01, -4.76289325e-02, -8.79302735e-03],
         [-6.25328400e-01, -7.69663437e-01,  4.66762566e-02,
          -1.17966063e-01, -1.82331121e-02, -1.24689606e-02],
         [-5.998

#### Combine and Label the normalized data

In [131]:
p = []
gait=[]
for k in gaitpos:
    for l in smartphone:

        if k == 'Normal':    
            for i in range(0, len(Normal_s3_n)):
                for j in range(0, len(Normal_s3_n[i])):
                    p.append(Normal_s3_n[i][j])
                    gait.append(k)
        elif k == 'Upstairs':
            for i in range(0, len(Upstairs_s3_n)):
                for j in range(0, len(Upstairs_s3_n[i])):
                    p.append(Upstairs_s3_n[i][j])
                    gait.append(k)
        elif k == 'Downstairs':
            for i in range(0, len(Downstairs_s3_n)):
                for j in range(0, len(Downstairs_s3_n[i])):
                    p.append(Downstairs_s3_n[i][j])
                    gait.append(k)


In [132]:
Y=gait
X=p


In [39]:
plt.close()
fig, axs = plt.subplots(2,1)
fig.suptitle('Comparision of Resampled Data')
axs[0].plot(Normal_s3_n[0][1][:,0], label = 'X (m/s^2)', c = 'r')
axs[1].plot(Normal_s3[0][1][:,0], label = 'X (m/s^2)', c = 'g')

plt.show()

#### Output from PCA (normalize before input to AI)

## ---> AI Model

In [71]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold


In [141]:
  X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, shuffle=True, test_size=0.2)
  len(X_train),len(X_test)

(404, 102)

## KFold method 1

In [142]:
num_folds = 10

#Normalize the data
#X_train = X_train / 255
#y_test = y_test / 255

#Define per-fold score conatiners
accuracy_per_fold = []
loss_per_fold = []

In [143]:
# Merge train and tests
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

In [144]:
# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

In [149]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D

In [153]:
# # K-fold Cross Validation model evaluation
# fold_no = 1
# for train, test in kfold.split(inputs, targets):

#   # Define the model architecture
#   model = Sequential()
#   model.add(Conv2D(16, kernel_size=(2, 2), activation='relu', input_shape = len(X_train)))
#   model.add(Dropout(0.1))
  
#   model.add(Conv2D(32, kernel_size=(2, 2), activation='relu'))
#   model.add(Dropout(0.2))

#   model.add(Flatten())
    
#   model.add(Dense(64, activation='relu'))
#   model.add(Dropout(0.5))
#   #model.add(Dense(128, activation='relu'))
#   model.add(Dense(6, activation='softmax'))

#   # Compile the model
#   model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])


#   # Generate a print
#   print('------------------------------------------------------------------------')
#   print(f'Training for fold {fold_no} ...')

#   # Fit data to model
#   history = model.fit(inputs[train], targets[train], batch_size=50, epochs=25, verbose=1)
 
#   # Generate generalization metrics
#   scores = model.evaluate(inputs[test], targets[test], verbose=0)
#   print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
#   accuracy_per_fold.append(scores[1] * 100)
#   loss_per_fold.append(scores[0])

#   # Increase fold number
#   fold_no = fold_no + 1

TypeError: 'int' object is not iterable

## Kfold method 2

In [136]:
# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=8, activation='relu'))
	model.add(Dense(8, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [140]:
# # fix random seed for reproducibility
# seed = 7
# np.random.seed(seed)
# # load pima indians dataset
# # dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")

# # create model
# model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10)
# # evaluate using 10-fold cross validation
# kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

# model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10)
# results = cross_val_score(model, X=X_test,Y=y_test, cv=5)

  model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10)
  model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10)


TypeError: cross_val_score() got an unexpected keyword argument 'Y'

## Kfold method 3

In [154]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
cvscores = []
for train, test in kfold.split(X,Y):
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X[train], Y[train], epochs=150, batch_size=10, verbose=0)
    scores = model.evaluate(X[test], Y[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

TypeError: only integer scalar arrays can be converted to a scalar index

### Create combined dataframes