In [39]:
# imports

from pathlib import Path
from collections import namedtuple
from zoneinfo import ZoneInfo
from datetime import datetime
import os
import glob
import json
import pandas as pd
import numpy as np

In [43]:
# find min/max heart rate for every day

def timestamp_hr(csv_file):
    list_min_hr = []
    list_max_hr = []
    list_day = []

    df = pd.read_csv(csv_file)
    if df.empty or 'timestamp' not in df.columns or 'heartrate' not in df.columns:
        return [],[],[]

    unix_val = df['timestamp'].loc[0] # unix time in first row
    prev_day = datetime.fromtimestamp(unix_val,tz=ZoneInfo("America/New_York")).day
    min = df['heartrate'].loc[0]
    max = df['heartrate'].loc[0]

    for i in range(1,len(df)): # iterates through each row in the dataframe
        timestamp = df['timestamp'].loc[i]
        curr_day = datetime.fromtimestamp(timestamp,tz=ZoneInfo("America/New_York")).day
        curr_hr = df['heartrate'].loc[i]
        if prev_day == curr_day:
            if (curr_hr < min):
                min = curr_hr
            elif (curr_hr > max):
                max = curr_hr
        elif curr_day != prev_day:
            list_min_hr.append(min)
            list_max_hr.append(max)
            list_day.append(unix_val)
            
            min = curr_hr
            max = curr_hr
            unix_val = timestamp
            prev_day = curr_day

    unix_val = df['timestamp'].loc[len(df.index)-1]
    list_min_hr.append(min)
    list_max_hr.append(max)
    list_day.append(unix_val)

    return list_min_hr, list_max_hr, list_day

In [None]:
# ISSUE FIX -- fixed

minimum, maximum, day = timestamp_hr(r"C:\Users\VictoriaAgain\Downloads\download\90b8c2e2c843ffd77f8621c7d6ed044d\combined_heartrate.csv")
for i in range(len(day)):
    date = datetime.fromtimestamp(day[i],tz=ZoneInfo("America/New_York"))
    print(date)
print(len(day),len(min),len(max))

# convert_day = []
# for i in range(len(day)):
#     convert_day.append(datetime.fromtimestamp(day[i]).day)
# print(convert_day)

In [55]:
# total steps by day for one patient given csv file path

def timestamp_total_step(csv_file):
    list_total_step = []
    list_day_step = []
    
    df = pd.read_csv(csv_file)

    if df.empty or 'stop' not in df.columns or 'steps' not in df.columns:
        return [],[]
    
    unix_prev = df['stop'].loc[0]
    temp_day_prev = datetime.fromtimestamp(df['stop'].loc[0]).day
    temp_total_step = 0
    
    for i in df.index:
        val = df['stop'].loc[i]
        temp_day_curr = datetime.fromtimestamp(val).day
        if temp_day_prev == temp_day_curr:
            temp_total_step += df['steps'].loc[i]
        elif temp_day_curr != temp_day_prev:
            list_day_step.append(unix_prev)
            list_total_step.append(temp_total_step)
            unix_prev = df['stop'].loc[i]
            temp_total_step = 0
            temp_day_prev = temp_day_curr

    # for last day otherwise not included
    unix_prev = df['stop'].loc[len(df.index)-1]
    list_day_step.append(unix_prev)
    list_total_step.append(temp_total_step)
    
    return list_total_step, list_day_step

In [59]:
# testing
step,day = timestamp_total_step(r"C:\Users\VictoriaAgain\Downloads\download\90b8c2e2c843ffd77f8621c7d6ed044d\combined_steps.csv")
convert_day = []
for i in range(len(day)):
    convert_day.append(datetime.fromtimestamp(day[i]).day)
print(convert_day)
# df = pd.read_csv(r"C:\Users\VictoriaAgain\Downloads\download\90b8c2e2c843ffd77f8621c7d6ed044d\combined_steps.csv")

[25, 26, 27, 28, 29, 30, 31, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]


In [106]:
# ISSUE FIX -- repeats min/max 6 times for each day
def compile_step_hr(hr_file,step_file):
    step_arr, step_day = timestamp_total_step(step_file)
    min, max, hr_day = timestamp_hr(hr_file)
    
    list_day = []
    list_max = []
    list_min = []
    list_step = []

    # smaller functions work -- same dates -- something here is WRONG
    for i in range(np.minimum(len(hr_day),len(step_day))):  # whichever is smaller
        hr_date = datetime.fromtimestamp(hr_day[i],tz=ZoneInfo("America/New_York")).date()
        step_date = datetime.fromtimestamp(step_day[i],tz=ZoneInfo("America/New_York")).date()
        # hr_month = datetime.fromtimestamp(hr_day[i],tz=ZoneInfo("America/New_York")).month
        # hr_datetime_day = datetime.fromtimestamp(hr_day[i],tz=ZoneInfo("America/New_York")).day
        # step_month = datetime.fromtimestamp(step_day[i],tz=ZoneInfo("America/New_York")).month
        # step_datetime_day = datetime.fromtimestamp(step_day[i],tz=ZoneInfo("America/New_York")).day
        
        # need double for loop in case we skip a day!
        # print(hr_month, ",", hr_datetime_day)
        # print(step_month, ",", step_datetime_day)
        # if (hr_month == step_month and hr_datetime_day == step_datetime_day): # checking if date is the same
        if hr_date == step_date:
            list_day.append(hr_day[i])
            list_max.append(max[i])
            list_min.append(min[i])
            list_step.append(step_arr[i])
        # elif (hr_datetime_day > step_datetime_day): # heartrate skips, go through rest of step days
        elif hr_date > step_date:
            for j in range(i,len(step_day)):
                # if (hr_month == step_month and hr_datetime_day == datetime.fromtimestamp(step_day[j],tz=ZoneInfo("America/New_York")).day):
                if datetime.fromtimestamp(step_day[j], tz=ZoneInfo("America/New_York")).date() == hr_date:
                    list_day.append(hr_day[i])
                    list_max.append(max[i])
                    list_min.append(min[i])
                    list_step.append(step_arr[j])
        # elif (step_datetime_day > hr_datetime_day): # step day skips, go through rest of heart rate days
        elif step_date > hr_date:
            for k in range(i,len(hr_day)):
                # if (hr_month == step_month and step_datetime_day == datetime.fromtimestamp(hr_day[k],tz=ZoneInfo("America/New_York")).day):
                if datetime.fromtimestamp(hr_day[k], tz=ZoneInfo("America/New_York")).date() == step_date:
                    list_day.append(hr_day[k])
                    list_max.append(max[k])
                    list_min.append(min[k])
                    list_step.append(step_arr[i])
    
    list_patient_id = [str(hr_file)[42:74]]*len(list_day)
    combined_arr = np.column_stack((list_patient_id,list_day,list_min,list_max,list_step))
    return combined_arr

In [112]:
arr = compile_step_hr(r"C:\Users\VictoriaAgain\Downloads\download\80929938f623f44614a029108a740d00\combined_heartrate.csv",r"C:\Users\VictoriaAgain\Downloads\download\80929938f623f44614a029108a740d00\combined_steps.csv")
df = pd.DataFrame(arr)

df
# for i in df.index:
#     print(datetime.fromtimestamp(df[1].loc[i],tz=ZoneInfo("America/New_York")))

Unnamed: 0,0,1,2,3,4
0,80929938f623f44614a029108a740d00,1637412569.2489471,52.0,117.0,4339.057098846426
1,80929938f623f44614a029108a740d00,1637470817.517382,50.0,97.0,4296.680636138327
2,80929938f623f44614a029108a740d00,1637592203.2239146,59.0,101.0,8570.605339707423
3,80929938f623f44614a029108a740d00,1637704280.3434532,67.0,67.0,336.6190395848259
4,80929938f623f44614a029108a740d00,1637758959.4615457,53.0,111.0,8822.874939185305
...,...,...,...,...,...
163,80929938f623f44614a029108a740d00,1652544886.0262475,63.0,98.0,2021.0000000000005
164,80929938f623f44614a029108a740d00,1652635746.3499784,58.0,103.0,6511.697083321055
165,80929938f623f44614a029108a740d00,1652706240.2805843,65.0,93.0,6089.559720682854
166,80929938f623f44614a029108a740d00,1652794607.8277097,68.0,113.0,6752.818981934637
