In [216]:
import pandas as pd
import os
from glob import glob

In [217]:
DATA_PATH = "./Experiment/Raw/"
SUBJECT_PATH = "P04/"
POLAR_PATH = "PolarH10/"
E4_PATH = 'e4/'
GALAXY_PATH = 'GalaxyWatch/'

In [218]:
polarh10_sample_rate = 130
e4_sample_rate = 64
galaxy_sample_rate = 25

In [219]:
sample_rate_list = [polarh10_sample_rate, e4_sample_rate, galaxy_sample_rate]
sensor_list = ['Polar H10', 'E4', 'Galaxy Watch']
sensor_path_list = [POLAR_PATH, E4_PATH, GALAXY_PATH]

In [220]:
event_df = pd.read_csv(glob(os.path.join(DATA_PATH, SUBJECT_PATH, "event.csv"))[0])

In [221]:
polarh10_df = pd.read_csv(glob(os.path.join(DATA_PATH, SUBJECT_PATH, POLAR_PATH, "ECG.csv"))[0])
e4_df = pd.read_csv(glob(os.path.join(DATA_PATH, SUBJECT_PATH, E4_PATH, "BVP.csv"))[0])
galaxy_df = pd.read_csv(glob(os.path.join(DATA_PATH, SUBJECT_PATH, GALAXY_PATH, "PPG.csv"))[0])

In [222]:
polarh10_df["Timestamp"] = pd.to_datetime(polarh10_df["phoneTimestamp"]/1e3, unit='s')
polarh10_df["Timestamp"] = polarh10_df["Timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")
e4_df["Timestamp"] = pd.to_datetime(e4_df["timestamp"]/1e6, unit='s') + pd.Timedelta(hours=9)
e4_df["Timestamp"] = e4_df["Timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")
galaxy_df["Timestamp"] = pd.to_datetime(galaxy_df["timestamp"]/1e3, unit='s') + pd.Timedelta(hours=9)
galaxy_df["Timestamp"] = galaxy_df["Timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")

polarh10_df["Data"] = polarh10_df["ecg"]
e4_df["Data"] = e4_df["value"]
galaxy_df["Data"] = galaxy_df["ppg"]

In [223]:
df_list = [polarh10_df, e4_df, galaxy_df]

In [224]:
for i in range(len(sensor_list)):
    print(f"============ {sensor_list[i]} [original information] =============")
    print(f"Start time: {df_list[i]['Timestamp'][0]}")
    print(f"End time: {df_list[i]['Timestamp'].iloc[-1]}")
    print(f"Data Length: {len(df_list[i])}")
    print(f"Data Length (in seconds): {len(df_list[i])/sample_rate_list[i]}(s)")
    print(f"Sampling Rate: {sample_rate_list[i]}Hz\n")

Start time: 2024-03-19 16:07:16
End time: 2024-03-19 17:12:49
Data Length: 513482
Data Length (in seconds): 3949.8615384615387(s)
Sampling Rate: 130Hz

Start time: 2024-03-19 16:09:51
End time: 2024-03-19 17:11:52
Data Length: 238150
Data Length (in seconds): 3721.09375(s)
Sampling Rate: 64Hz

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:12:35
Data Length: 93300
Data Length (in seconds): 3732.0(s)
Sampling Rate: 25Hz



In [225]:
start_time_polarh10 = polarh10_df["Timestamp"][0]
start_time_e4 = e4_df["Timestamp"][0]
start_time_galaxy = galaxy_df["Timestamp"][0]
start_time = max(start_time_polarh10, start_time_e4 ,start_time_galaxy)

In [226]:
end_time_polarh10 = polarh10_df["Timestamp"].iloc[-1]
end_time_e4 = e4_df["Timestamp"].iloc[-1]
end_time_galaxy = galaxy_df["Timestamp"].iloc[-1]
end_time = min(end_time_polarh10, end_time_e4 ,end_time_galaxy)

In [227]:
print("Standard Start Time: ", start_time)

Standard Start Time:  2024-03-19 16:10:08


In [228]:
print("Standard End Time: ", end_time)

Standard End Time:  2024-03-19 17:11:52


In [229]:
def get_clean_df(df, start_time, end_time):
    tmp_df = df.loc[(df['Timestamp'] >= start_time) &
                         (df['Timestamp'] <= end_time)].reset_index(drop=True)

    clean_df = pd.DataFrame({
        'Timestamp': tmp_df['Timestamp'],
        'Data': tmp_df['Data'],
        'Status': 0
    })

    return clean_df

In [230]:
polarh10_clean_df = get_clean_df(polarh10_df, start_time, end_time)
e4_clean_df = get_clean_df(e4_df, start_time, end_time)
galaxy_clean_df = get_clean_df(galaxy_df, start_time, end_time)

In [231]:
clean_df_list = [polarh10_clean_df, e4_clean_df, galaxy_clean_df]

In [232]:
for i in range(len(sensor_list)):
    print(f"============ {sensor_list[i]} [clean information] =============")
    print(f"Start time: {clean_df_list[i]['Timestamp'][0]}")
    print(f"End time: {clean_df_list[i]['Timestamp'].iloc[-1]}")
    print(f"Data Length: {len(clean_df_list[i])}")
    print(f"Data Length (in seconds): {len(clean_df_list[i])/sample_rate_list[i]}(s)")
    print(f"Sampling Rate: {sample_rate_list[i]}Hz\n")

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 483653
Data Length (in seconds): 3720.4076923076923(s)
Sampling Rate: 130Hz

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 237062
Data Length (in seconds): 3704.09375(s)
Sampling Rate: 64Hz

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 92244
Data Length (in seconds): 3689.76(s)
Sampling Rate: 25Hz



In [233]:
event_df["timestamp"] = pd.to_datetime(event_df["timestamp"]*1e6) + pd.Timedelta(hours=9)
event_df["timestamp"] = event_df["timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")

In [247]:
event_dict = {}
id = 0
prev_session = ""
event_dict['nothing'] = 0
for curr_session in event_df["session"]:
    if prev_session != curr_session:
        id += 1
    event_dict[curr_session] = id
    prev_session = curr_session


In [248]:
event_dict

{'nothing': 0,
 'adaptation': 1,
 'baseline': 2,
 'tsst-prep': 3,
 'tsst-speech': 4,
 'meditation-1': 5,
 'screen-reading': 6,
 'ssst-prep': 7,
 'ssst-sing': 8,
 'meditation-2': 9,
 'keyboard-typing': 10,
 'rest-1': 11,
 'mobile-typing': 12,
 'rest-2': 13,
 'standing': 14,
 'rest-3': 15,
 'walking': 16,
 'rest-4': 17,
 'jogging': 18,
 'rest-5': 19,
 'running': 20}

In [236]:
for df in clean_df_list:
    for key in event_dict.keys():
        tmp_df = event_df[['timestamp','session']].loc[event_df["session"] == key]
        df.loc[(df["Timestamp"] >= tmp_df['timestamp'].iloc[0]) &
                                        (df["Timestamp"] <= tmp_df['timestamp'].iloc[-1]), 'Status'] = event_dict[key]

### Validation for event marker

In [237]:
view_second = 3660

In [238]:
for i in range(len(sensor_list)):
    print(f"======== {sensor_list[i]} [status] ========")
    print(f"Time: {clean_df_list[i]['Timestamp'].iloc[view_second*sample_rate_list[i]]}")
    print(f"Status: {clean_df_list[i]['Status'].iloc[view_second*sample_rate_list[i]]}\n")

Time: 2024-03-19 17:10:52
Status: 20

Time: 2024-03-19 17:11:08
Status: 20

Time: 2024-03-19 17:11:23
Status: 20



In [239]:
# for i in range(len(sensor_list)):
#     clean_df_list[i] = clean_df_list[i].loc[clean_df_list[i]['Status'] != 0].reset_index(drop=True)

In [240]:
for i in range(len(sensor_list)):
    print(f"============ {sensor_list[i]} [clean information] =============")
    print(f"Start time: {clean_df_list[i]['Timestamp'][0]}")
    print(f"End time: {clean_df_list[i]['Timestamp'].iloc[-1]}")
    print(f"Data Length: {len(clean_df_list[i])}")
    print(f"Data Length (in seconds): {len(clean_df_list[i])/sample_rate_list[i]}(s)")
    print(f"Sampling Rate: {sample_rate_list[i]}Hz\n")

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 483653
Data Length (in seconds): 3720.4076923076923(s)
Sampling Rate: 130Hz

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 237062
Data Length (in seconds): 3704.09375(s)
Sampling Rate: 64Hz

Start time: 2024-03-19 16:10:08
End time: 2024-03-19 17:11:52
Data Length: 92244
Data Length (in seconds): 3689.76(s)
Sampling Rate: 25Hz



In [241]:
for i in range(len(sensor_list)):
    clean_df_list[i].to_csv(DATA_PATH + SUBJECT_PATH + sensor_path_list[i] + "labeled_data.csv")

In [250]:
event_dict_df = pd.DataFrame(event_dict, index=[0])
event_dict_df.to_csv(DATA_PATH + SUBJECT_PATH+ "event_dict.csv")

In [251]:
import csv
# Save dictionary to CSV file
with open(DATA_PATH + SUBJECT_PATH+ "event_dict.csv", 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=event_dict.keys())
    writer.writeheader()
    writer.writerow(event_dict)