In [1]:
from scipy.signal import butter, lfilter
import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime
from zipfile import ZipFile

In [2]:
def get_utc_date_time(ts):
    return pd.to_datetime(ts, unit='s', infer_datetime_format = True, utc = True)
#.strftime('%H:%M:%S:%f')

def add_fs(sample_rate, date):
    return date + datetime.timedelta(milliseconds=1.0/(sample_rate) * 1000.0)

In [3]:
def time_range(df_length, start_timestamp, sample_rate):
    # Generate date time range based on sample rate and starting timestamp
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

In [4]:
def format_time(t):
    s = t.strftime('%Y-%m-%d %H:%M:%S.%f')
    return s[:-3]

In [5]:
def process_bvp(list_of_zips, participant_id):
    bvp_all = []

    for zip_file in list_of_zips:
        zf = ZipFile(zip_file)
        bvp_df_raw = pd.read_csv(zf.open('BVP.csv'))

        starting_timestamp = bvp_df_raw.columns[0]
        sample_rate = bvp_df_raw[starting_timestamp][0]

        bvp_df_raw = bvp_df_raw.drop([0, 0])
        bvp_df = bvp_df_raw.copy()

        time_col = time_range(len(bvp_df_raw), starting_timestamp, sample_rate)
        bvp_df['Timestamp'] = time_col
        bvp_df['Timestamp'] = bvp_df.apply(lambda x: format_time(x['Timestamp']) + '+00:00', axis=1)

        bvp_df = bvp_df.rename(columns={starting_timestamp:"BVP"})
        bvp_df['ParticipantId'] = participant_id

        bvp_all.append(bvp_df)
    
    bvp_df_all = []
    bvp_df_all = pd.concat(bvp_all)
    return bvp_df_all

In [6]:
base_path = r"C:\Users\shaye\Documents\EAT Lab\Sensor_Data"
participant_data = glob.glob(base_path + "/ID PR*")

In [7]:
for path in participant_data: 
    path_parts = path.split("\\")
    participant_id = path_parts[-1:][0].split()[-1:][0]
    list_of_zips = glob.glob(path + "/Empatica data/A*.zip")
    bvp_test = process_bvp(list_of_zips, participant_id)
    print(bvp_test)

           BVP                      Timestamp ParticipantId
1        -0.00  2020-12-15 13:59:08.000+00:00         PR025
2        -0.00  2020-12-15 13:59:08.015+00:00         PR025
3        -0.00  2020-12-15 13:59:08.031+00:00         PR025
4        -0.00  2020-12-15 13:59:08.046+00:00         PR025
5        -0.00  2020-12-15 13:59:08.062+00:00         PR025
...        ...                            ...           ...
2540732  65.17  2021-01-14 01:34:42.921+00:00         PR025
2540733  63.93  2021-01-14 01:34:42.937+00:00         PR025
2540734  61.23  2021-01-14 01:34:42.953+00:00         PR025
2540735  57.64  2021-01-14 01:34:42.968+00:00         PR025
2540736  54.52  2021-01-14 01:34:42.984+00:00         PR025

[73602694 rows x 3 columns]
