In [1]:
from scipy.signal import butter, lfilter
import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime
from zipfile import ZipFile
from sqlalchemy import create_engine

In [2]:
def get_utc_date_time(ts):
    return pd.to_datetime(ts, unit='s', infer_datetime_format = True, utc = True)
#.strftime('%H:%M:%S:%f')

def add_fs(sample_rate, date):
    return date + datetime.timedelta(milliseconds=1.0/(sample_rate) * 1000.0)

In [3]:
def time_range(df_length, start_timestamp, sample_rate):
    # Generate date time range based on sample rate and starting timestamp
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

In [4]:
def format_time(t):
    s = t.strftime('%Y-%m-%d %H:%M:%S.%f')
    return s[:-3]

In [5]:
def process_hr(list_of_zips, participant_id):
    hr_all = []

    for zip_file in list_of_zips:
        zf = ZipFile(zip_file)
        hr_df_raw = pd.read_csv(zf.open('HR.csv'))

        starting_timestamp = hr_df_raw.columns[0]
        sample_rate = hr_df_raw[starting_timestamp][0]

        hr_df_raw = hr_df_raw.drop([0, 0])
        hr_df = hr_df_raw.copy()

        time_col = time_range(len(hr_df_raw), starting_timestamp, sample_rate)
        hr_df['Timestamp'] = time_col
        hr_df['Timestamp'] = hr_df.apply(lambda x: format_time(x['Timestamp']) + '+00:00', axis=1)

        hr_df = hr_df.rename(columns={starting_timestamp:"HR"})
        hr_df['ParticipantId'] = participant_id

        hr_all.append(hr_df)
    
    hr_df_all = []
    hr_df_all = pd.concat(hr_all)
    return hr_df_all

In [6]:
base_path = r"C:\Users\shaye\Documents\EAT Lab\Sensor_Data"
participant_data = glob.glob(base_path + "/ID PR*")

In [7]:
for path in participant_data: 
    path_parts = path.split("\\")
    participant_id = path_parts[-1:][0].split()[-1:][0]
    list_of_zips = glob.glob(path + "/Empatica data/A*.zip")
    hr_test = process_hr(list_of_zips, participant_id)
    print(hr_test)

          HR                      Timestamp ParticipantId
1      49.00  2020-12-15 13:59:18.000+00:00         PR025
2      49.00  2020-12-15 13:59:19.000+00:00         PR025
3      88.00  2020-12-15 13:59:20.000+00:00         PR025
4      78.00  2020-12-15 13:59:21.000+00:00         PR025
5      74.00  2020-12-15 13:59:22.000+00:00         PR025
...      ...                            ...           ...
39686  83.58  2021-01-14 01:34:39.000+00:00         PR025
39687  83.63  2021-01-14 01:34:40.000+00:00         PR025
39688  83.65  2021-01-14 01:34:41.000+00:00         PR025
39689  83.53  2021-01-14 01:34:42.000+00:00         PR025
39690  83.45  2021-01-14 01:34:43.000+00:00         PR025

[1149756 rows x 3 columns]
