This notebook is used to formalize and store the data used in anxiety dettection project into a single HDF style file

we have following 3 data collected for the project
1. treadmill data
2. fNIR data
3. EMG data

For each data, we have 4 trials: N1, N2, P1, P2

The following code will read each data file, parse them, and store then into a single file
The resulting data will have following 12 sub data
1. emg_N1
2. emg_N2
3. emg_P1
4. emg_P2
5. fNIRS_N1
6. fNIRS_N2
7. fNIRS_P1
8. fNIRS_P2
9. TM_N1
10. TM_N2
11. TM_P1
12. TM_P2

In [None]:
trial_number = '302'

In [15]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

# ML libraries 
from sklearn.cluster import KMeans
from sklearn.neighbors.kde import KernelDensity


# Computation & Signal Processing
from scipy import signal
import numpy as np
import pandas as pd
import pylab as pl
import pickle

#biosppy package for ecg signal analysis
from biosppy import storage
from biosppy.signals import ecg

from utils import * #import data import, clean up and sampling functions. 
# import time
import time
import os
import glob
import time

from ECG_feature_extractor_1000 import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


OA data module to store the data used for OA project

In [3]:
class OA_data:
    def __init__(self, number):
        self.data_array = []
        self.number = number
        
        
    def fit_emg(self, data_dir = 'csv/'):
        trial_name_1 = self.number + "_NW1"
        trial_name_2 = self.number + "_NW2"
        trial_name_3 = self.number + "_P1"
        trial_name_4 = self.number + "_P2"
        
        trial_data = glob.glob(data_dir + trial_name_1 + "*")
        try:
            trial_name = trial_data[-1]
            self.df_emg1 = load_data(trial_name, data_dir = '')
            #self.df_emg1 = delsys_cleanup(self.df_emg1, column='all')
        except:
            print("EMG file for " + trial_name_1 + " does not exist")
            
        trial_data = glob.glob(data_dir + trial_name_2 + "*")
        try:
            trial_name = trial_data[-1]
            self.df_emg2 = load_data(trial_name, data_dir = '')
            #self.df_emg2 = delsys_cleanup(self.df_emg2, column='all')
        except:
            print("EMG file for " + trial_name_2 + " does not exist")
            
        trial_data = glob.glob(data_dir + trial_name_3 + "*")
        try:
            trial_name = trial_data[-1]
            self.df_emg3 = load_data(trial_name, data_dir = '')
            #self.df_emg3 = delsys_cleanup(self.df_emg3, column='all')
        except:
            print("EMG file for " + trial_name_3 + " does not exist")
            
        trial_data = glob.glob(data_dir + trial_name_4 + "*")
        try:
            trial_name = trial_data[-1]
            self.df_emg4 = load_data(trial_name, data_dir = '')
            #self.df_emg4 = delsys_cleanup(self.df_emg4, column='all')
        except:
            print("EMG file for " + trial_name_4 + " does not exist")
    
    def fit_fNIRS(self, data_dir = 'fNIRS_Walking data'):
        file_name = '/OA_FNIRS_2019_WALK_' + self.number + '_oxydata.txt'
        df_fNIRS = pd.read_csv(data_dir+file_name,sep='\t')
        df_fNIRS = df_fNIRS.rename(columns={'1':'subject_ID', '2':'cohort', '3':'block','4':'trial_i','5':'HbO1','6':'HbR1','7':'HbO2','7':'HbR2','8':'HbO3','9':'HbR3','10':'HbO4','11':'HbR4','12':'HbO5','13':'HbR5','14':'HbO6','15':'HbR6','16':'HbO7','17':'HbR7','18':'HbO8','19':'HbR8','20':'HbO9','21':'HbR9','23':'HbO10','24':'HbR10','25':'HbO11','26':'HbR11','27':'HbO12','28':'HbR12','29':'HbO13','30':'HbR13','31':'HbO14','32':'HbR14','33':'HbO15','34':'HbR15','35':'HbO16','36':'HbR16'})
        df_fNIRS['time'] = [i*0.5 for i in range(len(df_fNIRS))]
        df_fNIRS = df_fNIRS[['time','subject_ID', 'cohort', 'block', 'trial_i', 'HbO1', 'HbR1', 'HbR2',
                            'HbO3', 'HbR3', 'HbO4', 'HbR4', 'HbO5', 'HbR5', 'HbO6', 'HbR6', 'HbO7',
                            'HbR7', 'HbO8', 'HbR8', 'HbO9', 'HbR9', '22', 'HbO10', 'HbR10', 'HbO11',
                            'HbR11', 'HbO12', 'HbR12', 'HbO13', 'HbR13', 'HbO14', 'HbR14', 'HbO15',
                            'HbR15', 'HbO16', 'HbR16']]
        self.df_fNIRS_N1 = df_fNIRS[df_fNIRS['trial_i']==1]
        self.df_fNIRS_N2 = df_fNIRS[df_fNIRS['trial_i']==2]
        self.df_fNIRS_P1 = df_fNIRS[df_fNIRS['trial_i']==3]
        self.df_fNIRS_P2 = df_fNIRS[df_fNIRS['trial_i']==4]
        
    
    def fit_TMdata(self, data_dir = 'Treadmill_data'):
        trial_name_1 = self.number + "_N1"
        trial_name_2 = self.number + "_N2"
        trial_name_3 = self.number + "_P1"
        trial_name_4 = self.number + "_P2"
        
        file_name = '/OA_' + trial_name_1 + '_RAWDATA.csv'
        self.df_tm1 = load_data(file_name, data_dir, header=1)
        self.df_tm1 = self.df_tm1.rename(columns={'Time': 'time'})
        
        file_name = '/OA_' + trial_name_2 + '_RAWDATA.csv'
        self.df_tm2 = load_data(file_name, data_dir, header=1)
        self.df_tm2 = self.df_tm2.rename(columns={'Time': 'time'})
        
        file_name = '/OA_' + trial_name_3 + '_RAWDATA.csv'
        self.df_tm3 = load_data(file_name, data_dir, header=1)
        self.df_tm3 = self.df_tm3.rename(columns={'Time': 'time'})
        
        file_name = '/OA_' + trial_name_4 + '_RAWDATA.csv'
        self.df_tm4 = load_data(file_name, data_dir, header=1)
        self.df_tm4 = self.df_tm4.rename(columns={'Time': 'time'})
        
    def save(self):
        hdf = pd.HDFStore(self.number +'.h5')
        hdf.put('emg_N1', self.df_emg1, format='table', data_columns=True)
        hdf.put('emg_N2', self.df_emg2, format='table', data_columns=True)
        hdf.put('emg_P1', self.df_emg3, format='table', data_columns=True)
        hdf.put('emg_P2', self.df_emg4, format='table', data_columns=True)
        hdf.put('fNIRS_N1', self.df_fNIRS_N1, format='table', data_columns=True)
        hdf.put('fNIRS_N2', self.df_fNIRS_N2, format='table', data_columns=True)
        hdf.put('fNIRS_P1', self.df_fNIRS_P1, format='table', data_columns=True)
        hdf.put('fNIRS_P2', self.df_fNIRS_P2, format='table', data_columns=True)
        hdf.put('TM_N1', self.df_tm1, format='table', data_columns=True)
        hdf.put('TM_N2', self.df_tm2, format='table', data_columns=True)
        hdf.put('TM_P1', self.df_tm3, format='table', data_columns=True)
        hdf.put('TM_P2', self.df_tm4, format='table', data_columns=True)
        hdf.close()

We firs create the data module in order to save the data files

Pass the trial number to initialize the module.

In [6]:
abc = OA_data('302')

For next step, we fit the data module using the corresponding data files

The default directory of each data file is shown below:

1. EMG: csv/
2. fNIRS: fNIRS_Walking data
3. TreadMill: Treadmill_data

If data file are note located at default dir, pass the dir name as an argument to load the data

In [7]:
abc.fit_emg()
abc.fit_fNIRS()
abc.fit_TMdata()

In [8]:
abc.df_emg1

Unnamed: 0,X[s],R RECTUS FEMORIS: EMG 1,X[s].1,R RECTUS FEMORIS: Acc 1.X,X[s].2,R RECTUS FEMORIS: Acc 1.Y,X[s].3,R RECTUS FEMORIS: Acc 1.Z,X[s].4,L RECTUS FEMORIS: EMG 2,...,X[s].56,Trigno Trigger sensor 15: Trig 15,X[s].57,L PECTORALIS MAJOR: EKG 16,X[s].58,L PECTORALIS MAJOR: Acc 16.X,X[s].59,L PECTORALIS MAJOR: Acc 16.Y,X[s].60,L PECTORALIS MAJOR: Acc 16.Z
0,0.000000,0.0,0.00000,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,0.0,...,0.000000,0.072555,0.000000,0.0,0.00000,0.000000,0.00000,0.000000,0.00000,0.000000
1,0.000519,0.0,0.00675,0.000000,0.00675,0.000000,0.00675,0.00000,0.000519,0.0,...,0.000519,0.072555,0.000519,0.0,0.00675,0.000000,0.00675,0.000000,0.00675,0.000000
2,0.001038,0.0,0.01350,0.000000,0.01350,0.000000,0.01350,0.00000,0.001038,0.0,...,0.001038,0.072555,0.001038,0.0,0.01350,0.000000,0.01350,0.000000,0.01350,0.000000
3,0.001558,0.0,0.02025,0.000000,0.02025,0.000000,0.02025,0.00000,0.001558,0.0,...,0.001558,0.072555,0.001558,0.0,0.02025,0.000000,0.02025,0.000000,0.02025,0.000000
4,0.002077,0.0,0.02700,-0.898329,0.02700,0.060426,0.02700,-0.14905,0.002077,0.0,...,0.002077,0.072555,0.002077,0.0,0.02700,0.060426,0.02700,-0.636484,0.02700,0.306157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578157,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578158,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578159,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578160,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000


Final step

Save the data module using save() function

CAUTION!
- Make sure the same file you are going to save does not exist. If does, make sure to delete it before saving it.

This step takes significant amount of time. (over 5 min)

In [9]:
abc.save()

Following code shows how to retrieve the data back from saved file

In [11]:
#Only importing Pandas is enough for reading the file
#First Argument: file name
#Second Argument: data to retrieve
OA_302_emg_N1 = pd.read_hdf('302.h5','emg_N1')

In [14]:
OA_302_emg_N1

Unnamed: 0,X[s],R RECTUS FEMORIS: EMG 1,X[s].1,R RECTUS FEMORIS: Acc 1.X,X[s].2,R RECTUS FEMORIS: Acc 1.Y,X[s].3,R RECTUS FEMORIS: Acc 1.Z,X[s].4,L RECTUS FEMORIS: EMG 2,...,X[s].56,Trigno Trigger sensor 15: Trig 15,X[s].57,L PECTORALIS MAJOR: EKG 16,X[s].58,L PECTORALIS MAJOR: Acc 16.X,X[s].59,L PECTORALIS MAJOR: Acc 16.Y,X[s].60,L PECTORALIS MAJOR: Acc 16.Z
0,0.000000,0.0,0.00000,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,0.0,...,0.000000,0.072555,0.000000,0.0,0.00000,0.000000,0.00000,0.000000,0.00000,0.000000
1,0.000519,0.0,0.00675,0.000000,0.00675,0.000000,0.00675,0.00000,0.000519,0.0,...,0.000519,0.072555,0.000519,0.0,0.00675,0.000000,0.00675,0.000000,0.00675,0.000000
2,0.001038,0.0,0.01350,0.000000,0.01350,0.000000,0.01350,0.00000,0.001038,0.0,...,0.001038,0.072555,0.001038,0.0,0.01350,0.000000,0.01350,0.000000,0.01350,0.000000
3,0.001558,0.0,0.02025,0.000000,0.02025,0.000000,0.02025,0.00000,0.001558,0.0,...,0.001558,0.072555,0.001558,0.0,0.02025,0.000000,0.02025,0.000000,0.02025,0.000000
4,0.002077,0.0,0.02700,-0.898329,0.02700,0.060426,0.02700,-0.14905,0.002077,0.0,...,0.002077,0.072555,0.002077,0.0,0.02700,0.060426,0.02700,-0.636484,0.02700,0.306157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578157,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578158,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578159,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
578160,,0.0,,0.000000,,0.000000,,0.00000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000


In [33]:
data_dir = 'csv/'
x=glob.glob(data_dir + '201_P1' + "*")
t = x[-1]
t

'csv/201_P1_Rep_1.23.csv'

In [34]:
p=pd.read_csv(t)

In [35]:
p

Unnamed: 0,X[s],R RECTUS FEMORIS: EMG 1,X[s].1,R RECTUS FEMORIS: Acc 1.X,X[s].2,R RECTUS FEMORIS: Acc 1.Y,X[s].3,R RECTUS FEMORIS: Acc 1.Z,X[s].4,L RECTUS FEMORIS: EMG 2,...,X[s].56,Trigno Trigger sensor 15: Trig 15,X[s].57,L PECTORALIS MAJOR: EKG 16,X[s].58,L PECTORALIS MAJOR: Acc 16.X,X[s].59,L PECTORALIS MAJOR: Acc 16.Y,X[s].60,L PECTORALIS MAJOR: Acc 16.Z
0,0.000000,0.0,0.00000,0.000000,0.00000,0.00000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.072555,0.000000,0.0,0.00000,0.000000,0.00000,0.000000,0.00000,0.000000
1,0.000519,0.0,0.00675,0.000000,0.00675,0.00000,0.00675,0.000000,0.000519,0.0,...,0.000519,0.072555,0.000519,0.0,0.00675,0.000000,0.00675,0.000000,0.00675,0.000000
2,0.001038,0.0,0.01350,0.000000,0.01350,0.00000,0.01350,0.000000,0.001038,0.0,...,0.001038,0.072555,0.001038,0.0,0.01350,0.000000,0.01350,0.000000,0.01350,0.000000
3,0.001558,0.0,0.02025,0.000000,0.02025,0.00000,0.02025,0.000000,0.001558,0.0,...,0.001558,0.072555,0.001558,0.0,0.02025,0.000000,0.02025,0.000000,0.02025,0.000000
4,0.002077,0.0,0.02700,-0.958755,0.02700,0.12488,0.02700,0.016114,0.002077,0.0,...,0.002077,0.072555,0.002077,0.0,0.02700,0.153079,0.02700,-0.789563,0.02700,0.048341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378061,,0.0,,0.000000,,0.00000,,0.000000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
378062,,0.0,,0.000000,,0.00000,,0.000000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
378063,,0.0,,0.000000,,0.00000,,0.000000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
378064,,0.0,,0.000000,,0.00000,,0.000000,,0.0,...,,0.000000,,0.0,,0.000000,,0.000000,,0.000000
