In [108]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join

In [115]:
participants = ['p00','p01','p03','p04','p05','p06','p07','p08','p09','p10','p11','p12','p13','p14','p15'
                ,'p16','p17','p19','p21','p22','p23','p24']

#**Read raw data and tags information from E4**

In [116]:
tags = {}
acc_raw, bvp_raw, eda_raw, hr_raw, temp_raw = {}, {}, {}, {}, {}
for pid in participants:
    directory = '\\raw_data\\Participants_data\\'+pid+'\\'+pid+'_e4\\'
    
    file_path = [f for f in listdir(directory) if isfile(join(directory, f))]
    for file in file_path:
        if file.endswith('ACC.csv'):
            acc_raw[pid] = pd.read_csv(directory+'ACC.csv', header=None)
            
        elif file.endswith('BVP.csv'):
            bvp_raw[pid] = pd.read_csv(directory+'BVP.csv', header=None)

        elif file.endswith('HR.csv'):
            hr_raw[pid] = pd.read_csv(directory+'HR.csv', header=None)

        elif file.endswith('EDA.csv'):
            eda_raw[pid] = pd.read_csv(directory+'EDA.csv', header=None)

        elif file.endswith('TEMP.csv'):
            temp_raw[pid] = pd.read_csv(directory+'TEMP.csv', header=None)
            
        elif file.endswith('tags.csv'):
            tags_info = pd.read_csv(directory+'tags.csv', header=None)
            tags[pid] = tags_info.iloc[:,0]        


#**The .csv file from E4 has 3 important information:**
***1)* first raw is the start time**
***2)* second row is the sampling frequency**
***3)* data starts from third row**

In [117]:
acc, bvp, eda, hr, temp = {}, {}, {}, {}, {}
for pid in participants:

    acc[pid] = {'start_time': acc_raw[pid].iloc[0,0], 'sampling_freq': acc_raw[pid].iloc[1,0], 
                'data': acc_raw[pid].iloc[2:,:]}
    acc[pid]['data'].columns = ['x','y','z']

    bvp[pid] = {'start_time': bvp_raw[pid].iloc[0,0], 'sampling_freq': bvp_raw[pid].iloc[1,0], 
                'data': bvp_raw[pid].iloc[2:,:]}
    bvp[pid]['data'].columns = ['bvp']

    eda[pid] = {'start_time': eda_raw[pid].iloc[0,0], 'sampling_freq': eda_raw[pid].iloc[1,0], 
                'data': eda_raw[pid].iloc[2:,:]}
    eda[pid]['data'].columns = ['eda']

    hr[pid] = {'start_time': hr_raw[pid].iloc[0,0], 'sampling_freq': hr_raw[pid].iloc[1,0], 
               'data': hr_raw[pid].iloc[2:,:]}
    hr[pid]['data'].columns = ['bpm']

    temp[pid] = {'start_time': temp_raw[pid].iloc[0,0],  'sampling_freq': temp_raw[pid].iloc[1,0], 
                 'data': temp_raw[pid].iloc[2:,:]}
    temp[pid]['data'].columns = ['temp']

#**Taking data from ACC, BVP, EDA, HR, TEMP**

In [118]:
all_data = {'ACC':acc, 'BVP':bvp, 'EDA':eda, 'HR':hr, 'TEMP':temp}

#**Since the tagged data is different for different groups, the conditions also differ**

In [None]:
for pid in participants:
    for i in all_data:
        sample_rate = all_data[i][pid]['sampling_freq']
        start = all_data[i][pid]['start_time']
        stop = start + (len(all_data[i][pid]['data'])/sample_rate)
        time_series = np.linspace(start, stop, num=len(all_data[i][pid]['data'])).tolist()
        data = all_data[i][pid]['data']

        data['time_series'] = time_series
        if (pid == 'p01' or pid == 'p03'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 3, 4, 0, 4, 1, 4, 2]

        elif (pid == 'p04' or pid == 'p05' or pid == 'p06' or pid == 'p07'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 3, 4, 2, 4, 1, 4, 0]

        elif (pid == 'p10' or pid == 'p11'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) & (data['time_series'] <= tags[pid][7]),
            (data['time_series'] >= tags[pid][7])
            ] 

            values = [4, 0, 4, 1, 4, 4, 2, 4, 3]

        elif (pid == 'p13' or pid == 'p15'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 2, 4, 1, 4, 0, 4, 3]      

        elif (pid == 'p16' or pid == 'p17' or pid == 'p19' or pid == 'p24'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 3, 4, 1, 4, 0, 4, 2] 

        elif (pid == 'p21' or pid == 'p22' or pid == 'p23'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 1, 4, 0, 4, 2, 4, 3]

        elif (pid == 'p00'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) & (data['time_series'] <= tags[pid][7]),
            (data['time_series'] >= tags[pid][7])
            ] 

            values = [4, 3, 4, 0, 4, 1, 4, 2, 4]

        elif (pid == 'p08'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) 
            ] 

            values = [4, 0, 4, 1, 4, 2, 4, 3]     

        elif (pid == 'p09'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) 
            ] 

            values = [4, 0, 1, 4, 2, 4, 3]   

        elif (pid == 'p12'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) & (data['time_series'] <= tags[pid][7]),
            (data['time_series'] >= tags[pid][7])
            ] 

            values = [4, 2, 4, 1, 4, 4, 0, 4, 3]

        elif (pid == 'p14'):
            conditions = [
            (data['time_series'] <= tags[pid][0]),
            (data['time_series'] >= tags[pid][0]) & (data['time_series'] <= tags[pid][1]),
            (data['time_series'] >= tags[pid][1]) & (data['time_series'] <= tags[pid][2]),
            (data['time_series'] >= tags[pid][2]) & (data['time_series'] <= tags[pid][3]),
            (data['time_series'] >= tags[pid][3]) & (data['time_series'] <= tags[pid][4]),
            (data['time_series'] >= tags[pid][4]) & (data['time_series'] <= tags[pid][5]),
            (data['time_series'] >= tags[pid][5]) & (data['time_series'] <= tags[pid][6]),
            (data['time_series'] >= tags[pid][6]) & (data['time_series'] <= tags[pid][7]),
            (data['time_series'] >= tags[pid][7])
            ] 

            values = [4, 2, 4, 1, 4, 0, 4, 3, 4]

        data['label'] = np.select(conditions, values)

        # time series isn't required anymore
        data = data.drop(['time_series'], axis=1)

        # save as csv
        data.to_csv('\\labelled_data\\'+pid+'\\'+str(i)+'.csv', index=False)