In [108]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join

In [115]:
users = ['p00','p01','p02','p03','p04']

#**Read raw data and tags information from E4**

In [116]:
tags = {}
acc_raw, bvp_raw, eda_raw, hr_raw, temp_raw = {}, {}, {}, {}, {}
for id in users:
    directory = '\\raw_data\\User_data\\'+id+'\\'+id+'_e4\\'
    
    file_path = [f for f in listdir(directory) if isfile(join(directory, f))]
    for file in file_path:
        if file.endswith('ACC.csv'):
            acc_raw[id] = pd.read_csv(directory+'ACC.csv', header=None)
            
        elif file.endswith('BVP.csv'):
            bvp_raw[id] = pd.read_csv(directory+'BVP.csv', header=None)

        elif file.endswith('HR.csv'):
            hr_raw[id] = pd.read_csv(directory+'HR.csv', header=None)

        elif file.endswith('EDA.csv'):
            eda_raw[id] = pd.read_csv(directory+'EDA.csv', header=None)

        elif file.endswith('TEMP.csv'):
            temp_raw[id] = pd.read_csv(directory+'TEMP.csv', header=None)
            
        elif file.endswith('tags.csv'):
            tags_info = pd.read_csv(directory+'tags.csv', header=None)
            tags[id] = tags_info.iloc[:,0]        


#**The .csv file from E4 has 3 important information:**
***1)* first raw is the start time**
***2)* second row is the sampling frequency**
***3)* data starts from third row**

In [117]:
acc, bvp, eda, hr, temp = {}, {}, {}, {}, {}
for id in users:

    acc[id] = {'start_time': acc_raw[id].iloc[0,0], 'sampling_freq': acc_raw[id].iloc[1,0], 
                'data': acc_raw[id].iloc[2:,:]}
    acc[id]['data'].columns = ['x','y','z']

    bvp[id] = {'start_time': bvp_raw[id].iloc[0,0], 'sampling_freq': bvp_raw[id].iloc[1,0], 
                'data': bvp_raw[id].iloc[2:,:]}
    bvp[id]['data'].columns = ['bvp']

    eda[id] = {'start_time': eda_raw[id].iloc[0,0], 'sampling_freq': eda_raw[id].iloc[1,0], 
                'data': eda_raw[id].iloc[2:,:]}
    eda[id]['data'].columns = ['eda']

    hr[id] = {'start_time': hr_raw[id].iloc[0,0], 'sampling_freq': hr_raw[id].iloc[1,0], 
               'data': hr_raw[id].iloc[2:,:]}
    hr[id]['data'].columns = ['bpm']

    temp[id] = {'start_time': temp_raw[id].iloc[0,0],  'sampling_freq': temp_raw[id].iloc[1,0], 
                 'data': temp_raw[id].iloc[2:,:]}
    temp[id]['data'].columns = ['temp']

#**Taking data from ACC, BVP, EDA, HR, TEMP**

In [118]:
all_data = {'ACC':acc, 'BVP':bvp, 'EDA':eda, 'HR':hr, 'TEMP':temp}

#**Add labels**

In [None]:
for id in users:
    for i in all_data:
        sample_rate = all_data[i][id]['sampling_freq']
        start = all_data[i][id]['start_time']
        stop = start + (len(all_data[i][id]['data'])/sample_rate)
        time_series = np.linspace(start, stop, num=len(all_data[i][id]['data'])).tolist()
        data = all_data[i][id]['data']

        data['time_series'] = time_series

        # if all users have same labels then remove if 
        if (id == 'p01' or id == 'p03'):
            conditions = [
            (data['time_series'] <= tags[id][0]),
            (data['time_series'] >= tags[id][0]) & (data['time_series'] <= tags[id][1]),
            (data['time_series'] >= tags[id][1]) & (data['time_series'] <= tags[id][2]),
            (data['time_series'] >= tags[id][2]) & (data['time_series'] <= tags[id][3]),
            (data['time_series'] >= tags[id][3]) & (data['time_series'] <= tags[id][4]),
            (data['time_series'] >= tags[id][4]) & (data['time_series'] <= tags[id][5]),
            (data['time_series'] >= tags[id][5]) & (data['time_series'] <= tags[id][6]),
            (data['time_series'] >= tags[id][6]) 
            ] 

            values = [4, 3, 4, 0, 4, 1, 4, 2]

        elif (id == 'p00' or id == 'p02' or id == 'p04'):
            conditions = [
            (data['time_series'] <= tags[id][0]),
            (data['time_series'] >= tags[id][0]) & (data['time_series'] <= tags[id][1]),
            (data['time_series'] >= tags[id][1]) & (data['time_series'] <= tags[id][2]),
            (data['time_series'] >= tags[id][2]) & (data['time_series'] <= tags[id][3]),
            (data['time_series'] >= tags[id][3]) & (data['time_series'] <= tags[id][4]),
            (data['time_series'] >= tags[id][4]) & (data['time_series'] <= tags[id][5]),
            (data['time_series'] >= tags[id][5]) & (data['time_series'] <= tags[id][6]),
            (data['time_series'] >= tags[id][6]) 
            ] 

            values = [4, 3, 4, 2, 4, 1, 4, 0]


        data['label'] = np.select(conditions, values)

        # do not drop time series if it is required for further processing
        data = data.drop(['time_series'], axis=1)

        # save as csv
        data.to_csv('\\labelled_data\\'+id+'\\'+str(i)+'.csv', index=False)