# CIE Project A - Group 7

## ---> Data processing

### Imports

In [None]:
import os

import pandas as pd
import numpy as np
from scipy import signal as sp
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 100
%matplotlib widget

# import tensorflow as tf

#### Get the root directory and change to it

In [None]:
rootdir = os.getcwd()
os.chdir(rootdir)

### Variable Assignment

In [None]:
gyrFile = 'Gyroscope'
accFile = 'Accelerometer'

normal = 'Normal'
upstairs = 'Upstairs'
downstairs = 'Downstairs'

smartphone = [1, 3]

s1 = 'Smartphone1'
s3 = 'Smartphone3'

s = [s1, s3]

gaitpos = [normal, upstairs, downstairs]

### Data Parsing

#### Define variables for raw data

In [None]:
count = 0

# Naming convention - gyr(oscope)_s(martphone_location)) and Same for acceleraoemeter
# All the data is stored in a list of dataframes. So, gyr{gaitpos}_s{martphone_location}[0, 1, 2, .....] gives the data as sorted by name in the file directory.
# gyrNormal_s1[0] gives the gyroscope data of the subject180_Normal sample 01 in the normal gait position.
# gyrNormal_s1[1] gives the gyroscope data of the subject180_Normal sample 02 in the normal gait position.
# 
# gyrNormal_s[2] gives the gyroscope data of the subject181_Normal sample 01 in the normal gait position.
# 
# and so on. 

# defining arrays for gyroscope data
gyrNormal_s1 = []; gyrUpstairs_s1 = []; gyrDownstairs_s1 = []
gyrNormal_s3 = []; gyrUpstairs_s3 = []; gyrDownstairs_s3 = []

#defining arrays for accelerometer data
accNormal_s1 = []; accUpstairs_s1 = []; accDownstairs_s1 = []
accNormal_s3 = []; accUpstairs_s3 = []; accDownstairs_s3 = []

#### Parsing gyroscope and accelerometer data in all gait positions (Smartphone 1 - Rear Right) & (Smartphone 3 - Front Right)

In [None]:
for subdir, dirs, files in os.walk(rootdir):
    for foldername in s:
        for gait in gaitpos:    
        
            gyr = eval(f'gyr{gait}_s{foldername[10]}')
            acc = eval(f'acc{gait}_s{foldername[10]}')
            
            if foldername.lower() in subdir.lower():
                if gait.lower() in subdir.lower():
    
                    for file in files:

                        if gyrFile.lower() in file.lower():
                            count = count + 1
                            # print(f'#{count} -- {os.path.join(subdir, file)}')

                            with open(os.path.join(subdir, file), 'r') as f: # open in read-only mode
                                gyrData = pd.read_csv(f)
                                # Assemble the data into a tensor
                                gyr.append(gyrData)

                        if accFile in file:
                            count = count + 1
                            # print(f'#{count} -- {os.path.join(subdir, file)}')

                            with open(os.path.join(subdir, file), 'r') as f: # open in read-only mode
                                accData = pd.read_csv(f)
                                # Assemble the data into a tensor
                                acc.append(accData)

#### Check if the data is collected correctly after parsing

In [None]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')

            print(f'Shape of gyr{j}_s{i} is ------ {np.asarray((gyrl), dtype = list).shape}')
            print(f'Shape of acc{j}_s{i} is ------ {np.asarray((accl), dtype = list).shape} \n')

### Data Validation and Preprocessing

##### -- No need to run -- Frequency check of all the data before filtering

In [None]:
freqGyr = []
freqAcc = []

# Loop through the variables and check if all the data is collected
for j in gaitpos:
    for i in smartphone:
        # Calculating gyroscope frequency before preprocessing
        gyrl = eval(f'gyr{j}_s{i}')
        ng = len(gyrl)

        # loop to calculate the frequency over all the data
        for s in range(0, ng):
            time = np.asarray(gyrl[s].iloc[:,0])

            freqG = np.round(len(time)/(time[-1]))
            freqGyr.append(freqG)
        
        # Calculating accelerometer frequency before preprocessing
        accl = eval(f'acc{j}_s{i}')
        na = len(accl)

        # loop to calculate the frequency over all the data
        for s in range(0, na):
            time = np.asarray(accl[s].iloc[:,0])

            freqA = np.round(len(time)/(time[-1]))
            freqAcc.append(freqA)

for i in range(0, len(freqGyr)):
    if freqGyr[i] < 10:
        print(f'Position of the frequency issue is at freqGyr:{i} and the frequency is {freqGyr[i]}')
for j in range(0, len(freqAcc)):
    if freqAcc[j] < 10:
        print(f'Position of the frequency issue is at freqAcc:{j} and the frequency is {freqAcc[j]}')

#### Filter Data

##### Lowpass filter (Butterworth) function

In [None]:
#Filter the data to remove noise

# inputs for the function is the data, the sampling frequency, the cutoff frequency.
def dataFilter(dataInput, fs=200, fc=5, order = 2):
    Filtrd = np.zeros(dataInput.shape)
    w = fc / (fs * 0.5)
    b, a = sp.butter(fc, w, 'low')
    
    #Filter the X, Y and Z axis of the input data
    for i in range(0, 3):
        Filtrd[:,i] = sp.filtfilt(b, a, dataInput.iloc[:,i+1])

    return Filtrd

##### -- Different filter check -- Chebyshev filter

In [None]:
'''
# chebyshev filter for the accelerometer data
def chebyFilter(dataInput, fs=200, fc=5, order = 2):
    Filtrd = np.zeros(dataInput.shape)
    w = fc / (fs * 0.5)
    b, a = sp.cheby2(order, 20, w, 'low')
    
    #Filter the X, Y and Z axis of the input data
    for i in range(0, 3):
        Filtrd[:,i] = sp.filtfilt(b,a, dataInput.iloc[:,i+1])
    
    return Filtrd
'''

#### -- Test -- the dataFilter function and its dependency on the frequency input

In [None]:
'''gyrF = []
accF = []
ng = len(gyrNormal_s1)
na = len(accNormal_s1)

for i in range(0, ng):
    timeg = np.asarray(gyrNormal_s1[i].iloc[:,0])
    freqG = np.round(len(timeg)/(timeg[-1]))
    gyrF.append(freqG)

for j in range(0, na):
    timea = np.asarray(accNormal_s1[j].iloc[:,0])
    freqA = np.round(len(timea)/(timea[-1]))
    accF.append(freqA)

gyrFilt = []
accFilt = []

for rg in range(0, ng):
    gf = dataFilter(gyrNormal_s1[rg], fs = 100)
    gyrFilt.append(gf)

for ra in range(0, na):
    af = dataFilter(accNormal_s1[ra], fs = 100)
    accFilt.append(af)

t1 = np.asarray(gyrNormal_s1[0].iloc[:,0])
f1 = np.round(len(t1)/(t1[-1]))

xyz = dataFilter(gyrNormal_s1[0], fs = f1)

plt.close('all')
plt.plot(gyrNormal_s1[0].iloc[:,1], 'r')
plt.plot(xyz[:,0], 'b')
plt.show()'''

#### Filtering all the data

In [None]:
# defining arrays for filtered gyroscope data
gyrFilteredNormal_s1 = []; gyrFilteredUpstairs_s1 = []; gyrFilteredDownstairs_s1 = []
gyrFilteredDownstairs_s3 = []; gyrFilteredUpstairs_s3 = []; gyrFilteredNormal_s3 = []

# defining arrays for filtered accelerometer data
accFilteredNormal_s1 = []; accFilteredUpstairs_s1 = []; accFilteredDownstairs_s1 = []
accFilteredNormal_s3 = []; accFilteredUpstairs_s3 = []; accFilteredDownstairs_s3 = []

for j in gaitpos:
    for i in smartphone:
            # variables of raw the data
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')
            
            # variables of filtered the data
            gyrF = eval(f'gyrFiltered{j}_s{i}')
            accF = eval(f'accFiltered{j}_s{i}')

            # getting the shape of the gyroscope data (first integer)
            ng = len(gyrl)

            # Getting the shape of the accelerometer data (first integer)
            na = len(accl)


            # Filter all the data
            for rg in range(0, ng):
                timeg = np.asarray(gyrl[rg].iloc[:,0])
                fG = np.round(len(timeg)/(timeg[-1]))

                gf = dataFilter(gyrl[rg], fs = fG)
                gyrF.append(gf)

            for ra in range(0, na):
                timea = np.asarray(accl[ra].iloc[:,0])
                fA = np.round(len(timea)/(timea[-1]))

                af = dataFilter(accl[ra], fs = fA)
                accF.append(af)

#### Check if all the data is collected correctly after filtering

In [None]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyrl = eval(f'gyrFiltered{j}_s{i}')
            accl = eval(f'accFiltered{j}_s{i}')

            print(f'Shape of gyrFiltered{j}_s{i} is ------ {np.asarray((gyrl), dtype = list).shape}')
            print(f'Shape of accFiltered{j}_s{i} is ------ {np.asarray((accl), dtype = list).shape} \n')

### Cut the data

#### Cut data function

In [None]:
def cutData(gyrFilt, accFilt, freq):
    acc_abs = np.linalg.norm(accFilt, axis=1)
    
    ht = np.round(np.mean(acc_abs[int(0.3*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)

    peaks, _ = sp.find_peaks(acc_abs, height = -10, distance=freq*0.5)
    diff_peaks = np.diff(peaks)
    gap1 = np.argmax(diff_peaks[:int(0.4*(len(diff_peaks)))])
    gap2 = np.argmax(diff_peaks[-int(0.4*len(diff_peaks)):])
    
    gap2 = int(np.shape(diff_peaks)[0] - int(0.4*len(diff_peaks)) + gap2)

    gyr_cut = gyrFilt[peaks[gap1+1]:peaks[gap2], :]
    acc_cut = accFilt[peaks[gap1+1]:peaks[gap2], :]

    return gyr_cut, acc_cut

##### -- check -- Cut data function

In [None]:
'''
gyrNormalCtted, accNormalCtted = [], []

for x in range(0, len(gyrNormal_s3)):
    num = x
    
    nq = len(accNormal_s3[num])
    tq = np.asarray(gyrNormal_s3[num].iloc[:,0])
    freq = np.round(len(tq)/(tq[-1]))
    # print(f'The frequency of the data is {freq}')

    acc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)

    # ht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)
    # ht = np.round(np.mean(acc_abs)).astype(int)
    # print(f'The average height of the data is {ht}')
    peaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)
    # print(f'The number of peaks is {(peaks)}')

    diff_peaks = np.diff(peaks)
    # print(f'The difference of peaks is {(diff_peaks)}')

    gap1 = np.argmax(diff_peaks[:20])
    # print(f'The first gap is {gap1}')

    gap2 = np.argmax(diff_peaks[-10:])
    gap2 = int(np.shape(diff_peaks)[0] - 10 + gap2)
    # print(f'The second gap is {gap2}')

    gyr_cut = gyrFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]
    acc_cut = accFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]

    gyrNormalCtted.append(gyr_cut)
    accNormalCtted.append(acc_cut)
'''

##### -- Test -- Cut Data line by line

In [None]:
'''
num = 7

nq = len(accNormal_s3[num])
tq = np.asarray(gyrNormal_s3[num].iloc[:,0])
freq = np.round(len(tq)/(tq[-1]))
print(f'The frequency of the data is {freq}')

acc_abs = np.linalg.norm(accFilteredNormal_s3[num], axis=1)

ht = np.round(np.mean(acc_abs[int(0.2*len(acc_abs)):int(0.7*len(acc_abs))])).astype(int)
# ht = np.round(np.mean(acc_abs)).astype(int)
print(f'The average height of the data is {ht}')
peaks, _ = sp.find_peaks(acc_abs, height = 0, distance=freq*0.5)
print(f'The number of peaks is {(peaks)}')

diff_peaks = np.diff(peaks)
print(f'The difference of peaks is {(diff_peaks)}')

gap1 = np.argmax(diff_peaks[:int(0.1*(len(diff_peaks)))])
print(f'The first gap is {gap1}')

gap2 = np.argmax(diff_peaks[-int(0.1*len(diff_peaks)):])
gap2 = int(np.shape(diff_peaks)[0] - int(0.2*len(diff_peaks)) + gap2)
print(f'The second gap is {gap2}')

gyr_cut = gyrFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]
acc_cut = accFilteredNormal_s3[num][peaks[gap1+1]:peaks[gap2], :]

plt.close()
# plt.plot(gyrFilteredNormal_s1[1][:,0], label='Raw Data', c = 'b')

plt.plot(acc_abs, label = 'Accelerometer Absolute Data', c = 'grey')
plt.plot(np.linspace(0, len(gyrNormal_s3[num])), np.full_like(np.linspace(0, len(gyrNormal_s3[num])), ht), label = 'Average Height', c = 'r')

plt.plot(peaks, acc_abs[peaks], 'x', label = 'Peaks', c = 'y')
plt.plot(np.linspace(0, len(gyrNormal_s3[num])), np.zeros_like(np.linspace(0, len(gyrNormal_s3[num]))), linestyle = "--", linewidth = 0.2, dashes=(5, 15), color="black")

plt.plot(peaks[gap1+1], acc_abs[peaks[gap1+1]], 'o', label = 'Gap 1', c = 'g')
plt.plot(peaks[gap2], acc_abs[peaks[gap2]], 'o', label = 'Gap 2', c = 'black')

plt.plot(gyr_cut[:,0], label = 'Gyr Cut', c='r')

plt.legend()
plt.grid()
plt.show()
'''

#### Cut all the data

In [None]:
# defining arrays for cut gyroscope data
gyrCutNormal_s1 = []; gyrCutUpstairs_s1 = []; gyrCutDownstairs_s1 = []
gyrCutNormal_s3 = []; gyrCutUpstairs_s3 = []; gyrCutDownstairs_s3 = []

# defining arrays for cut accelerometer data
accCutNormal_s1 = []; accCutUpstairs_s1 = []; accCutDownstairs_s1 = []
accCutNormal_s3 = []; accCutUpstairs_s3 = []; accCutDownstairs_s3 = []

for j in gaitpos:
    for i in smartphone:
            # variables of raw the data
            gyrl = eval(f'gyr{j}_s{i}')
            accl = eval(f'acc{j}_s{i}')
            
            # variables of filtered the data
            gyrF = eval(f'gyrFiltered{j}_s{i}')
            accF = eval(f'accFiltered{j}_s{i}')

            # variables of cutted the data
            gyrC = eval(f'gyrCut{j}_s{i}')
            accC = eval(f'accCut{j}_s{i}')

            # getting the shape of the gyroscope and accelerometer data
            n = len(gyrF)

            # Filter all the data
            for r in range(0, n):
                
                tCut = np.asarray(gyrl[r].iloc[:,0])
                freqCut = np.round(len(tCut)/(tCut[-1]))
                # print(f'accFiltered{j}_s{i} - {[r]}')
                gf, aF = cutData(gyrF[r], accF[r], freqCut)

                gyrC.append(gf)
                accC.append(aF)

#### check if all the data is collected correctly after cutting

In [None]:
# getting the data shape for reference
for j in gaitpos:
    for i in smartphone:
            gyr = eval(f'gyrCut{j}_s{i}')
            acc = eval(f'accCut{j}_s{i}')

            print(f'Shape of gyrCut{j}_s{i} is ------ {np.asarray((gyr), dtype = list).shape}')
            print(f'Shape of accCut{j}_s{i} is ------ {np.asarray((acc), dtype = list).shape} \n')

#### Plot raw data, filtered data, and cut data

In [None]:
pos = 'Normal'
loc = 3
no = 4

plt.close()
plt.plot(eval(f'gyr{pos}_s{loc}')[no].iloc[:,1], label='Raw Data', c = 'c')
plt.plot(eval(f'gyrFiltered{pos}_s{loc}')[no][:,0], label='Filtered Data', c = 'b')
plt.plot(eval(f'gyrCut{pos}_s{loc}')[no][:,0], label = 'Gyr Cutted Data', c = 'r')

plt.legend()
plt.grid()
plt.show()

#### -- Test -- Save all cut data to plots

In [None]:
'''
for j in gaitpos:
    for i in smartphone:
        # variables of raw the data
        gyrl = eval(f'gyr{j}_s{i}')
        accl = eval(f'acc{j}_s{i}')
        # variables of filtered the data
        gyrF = eval(f'gyrFiltered{j}_s{i}')
        accF = eval(f'accFiltered{j}_s{i}')
        # variables of cutted the data
        gyrC = eval(f'gyrCut{j}_s{i}')
        accC = eval(f'accCut{j}_s{i}')

        for rg in range(0, len(gyrF)):
            plt.close()
            plt.plot(gyrl[rg].iloc[:,1], label='Raw Gyroscope Data', c = 'c')
            # plt.plot(accl[rg].iloc[:,1], label='Raw Accelerometer Data', c = 'm')

            plt.plot(gyrF[rg][:,0], label='Filtered Gyroscope Data', c = 'b')
            # plt.plot(accF[rg][:,0], label='Filtered Accelerometer Data', c = 'g')

            plt.plot(gyrC[rg][:,0], label='Gyroscope Cut Data', c = 'y')
            # plt.plot(accC[rg][:,0], label = 'Accelerometer Cut Data', c = 'r')

            plt.legend()
            plt.grid()
            plt.ioff()
            # plt.savefig(rootdir+f'/2 Accelerometer_Pictures/acc{j}_s{i}_{rg}.png')
            plt.savefig(rootdir+f'/1 Gyroscope_Pictures/gyr{j}_s{i}_{rg}.png')
'''

### Plotttt function

In [None]:
def gyrPlot(x, y, lbl, xlbl='None', ylbl='None', ttl='None', clr='None'):
    plt.close()
    plt.plot(x, y, label=lbl, c=clr)

    plt.xlabel(xlbl)
    plt.ylabel(ylbl)
    plt.title(ttl)

    plt.legend()
    plt.grid()
    plt.show()

## ---> AI Model