## Activity and PPGR
Calculates ppgr auc, mets auc, average level, and activeMins metrics with a datetime of each meal for 2hr and 3hr timeframes

In [50]:
import numpy as np
import pandas as pd 
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime

In [51]:
#read datafile for METs
x=1
datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
time = datafile['mins'].to_numpy()
mets = datafile['METs'].to_numpy()
hr = datafile['HeartRate'].to_numpy()
datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

time = time[~np.isnan(time)]
mets = mets[~np.isnan(mets)]
hr = hr[~np.isnan(hr)]

#read cgm datafiles
dexcomdata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv")

In [23]:
def libreLinearInterpolation(cgm_df):
    # Convert the 'Date_Time' column to datetime type
    cgm_df['Date_Time'] = pd.to_datetime(cgm_df['Date_Time'])
    # Set the 'Date_Time' column as the index
    cgm_df.set_index('Date_Time', inplace=True)
    # Drop or fill missing values
    cgm_df.dropna(inplace=True)  # Drop rows with missing values
    # Fix duplicate error
    cgm_df = cgm_df.loc[~cgm_df.index.duplicated(), :]
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    cgm_df_interpolated = cgm_df.resample('1T').interpolate(method='linear')
    # Reset the index back to a column
    cgm_df_interpolated.reset_index(inplace=True)
    return cgm_df_interpolated

def dexcomLinearInterpolation(raw_cgm_df):
    # Convert the 'Date_Time' column to datetime type
    raw_cgm_df['Date_Time'] = pd.to_datetime(raw_cgm_df['Date_Time'])
    # Set the seconds component to 0
    raw_cgm_df['Date_Time'] = raw_cgm_df['Date_Time'].apply(lambda dt: dt.replace(second=0))
    # Set the 'Date_Time' column as the index
    raw_cgm_df.set_index('Date_Time', inplace=True)
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    interpolated_cgm_df = raw_cgm_df.resample('1T').interpolate(method='linear')
    interpolated_cgm_df
    # Reset the index back to a column
    interpolated_cgm_df.reset_index(inplace=True)
    return interpolated_cgm_df

def readData(x):
    #get globals
    global datafile
    global time
    global mets
    global hr
    global libredata
    global dexcomdata 
    
    #read datafile for METs
    datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
    time = datafile['mins'].to_numpy()
    mets = datafile['METs'].to_numpy()
    hr = datafile['HeartRate'].to_numpy()
    datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

    time = time[~np.isnan(time)]
    mets = mets[~np.isnan(mets)]
    hr = hr[~np.isnan(hr)]

    #read cgm datafiles
    #libredata = libreLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Libre.csv")) 
    dexcomdata = dexcomLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv"))

### Calculate metrics for all the meal times from a spreadsheet
can export to csv or print yay

In [43]:
hrmax = []
hrauc = []
hriauc = []
hrstart = []
z1mins = []
z2mins = []
z3mins = []
z4mins = []

def activityMetrics(timestring):
    tuples = zip(time,mets,hr)
    arr = np.asarray(list(tuples))

    #find start time
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile[datafile['Date_Time'].dt.date == target_time.date()]
    starttime = theday.iloc[0]['Date_Time']
    minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])

    timeframe = 180
    #filter to time interval
    filtered = arr[arr[:,0] <= minutes + timeframe]
    filtered = filtered[filtered[:,0] >= minutes]
    activity = filtered[:,1]

    #calculations
    try:
        au = auc(filtered[:,0], filtered[:,2])
        st = filtered[0,2]
        area = auc(filtered[:,0], activity)
        
        auc3.append(area)
        hrmax.append(np.max(filtered[:,2]))
        hrauc.append(au)
        hrstart.append(st)
        hriauc.append(au - timeframe*st)
        
    except ValueError:
        auc3.append("error")
        hrmax.append("error")
        hrauc.append("error")
        hrstart.append("error")
        hriauc.append("error")
    
    #for zones, use participant number to find age and store that as a variable    
    
    avg3.append(np.average(activity)/10)
    activeMin2.append(len(activity[activity > 20]))
    activeMin3.append(len(activity[activity > 30]))
    activeMin4.append(len(activity[activity > 30]))

In [44]:
peakheight = []
peaktime = []
pd40 = []
startbg = []
iauc = []

def iaucAll(timestring, threehr): 
    datafile1 = dexcomdata
    
    time = datafile1['mins'].to_numpy()
    bg = datafile1['BG'].to_numpy()
    time = time[~np.isnan(time)]
    bg = bg[~np.isnan(bg)]
    datafile1['Date_Time'] = pd.to_datetime(datafile1['Date_Time'])
    
    tuples = zip(time,bg)
    arr = np.asarray(list(tuples))
    
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile1[datafile1['Date_Time'].dt.date == target_time.date()]
    
    try:
        if(len(theday) == 0):
            raise ValueError("Data for this date is not available")
        starttime = theday.iloc[0]['Date_Time']
        minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])
    
        #filter to time interval
        filtered = arr[arr[:,0] >= minutes]
        filtered = filtered[filtered[:,0] <= minutes + 180]
        cgmdata = filtered[:,1]
        
        area = auc(filtered[:,0], cgmdata)
        
        #check that start and end pts are available (will look for a better way later)
        checkstart = filtered[filtered[:,0] == minutes]
        checkend = filtered[filtered[:,0] == minutes + 180]
        
        if len(checkstart) == 0 or len(checkend) == 0:
             raise ValueError('Endpoint not available')
                        
        peak = np.max(cgmdata)
        ptime = filtered[np.argmax(cgmdata), 0]
        pd4 = len(cgmdata[cgmdata > peak*.6])
        start = cgmdata[0]
        iaucval = area - start*180
       
    except ValueError:
        area = "error"
        peak = "error"
        pd4 = "error"
        ptime = "error"
        start = "error"
        iaucval = "error"
        
    threehr.append(area)
    peakheight.append(peak)
    pd40.append(pd4)
    peaktime.append(ptime)
    startbg.append(start)
    iauc.append(iaucval)


In [45]:
#calculate one person
dthree = []
auc3 = []
activeMin2 = []
activeMin3 = []
activeMin4 = []
avg3 = []
peakheight = []
peaktime = []
pd40 = []
startbg = []
iauc = []

participant = 2
mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(participant).zfill(3)}.xlsx").dropna()
mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
readData(participant)
    
for i in mldata['Meal Time']:
    print('\n' + str(i))
    activityMetrics(str(i))
    iaucAll(str(i), dthree)

df = pd.DataFrame({'Meal Time': mldata['Meal Time'], 'snack in 3hrs': mldata['snack'], 'Meal Type': mldata['Meal Type'],
                   #'dexcom 2hr auc': dtwo, 'mets 2 hr auc': auc2, '2 hr activeMin': activeMin2, 'mets 2 hr avg': avg2,
                   'peakheight': peakheight, 'peaktime': peaktime, 'peakduration_40': pd40, 'startbg': startbg, 'iauc': iauc,
                   'dexcom 3hr auc':dthree, 'mets 3 hr auc': auc3, 'hrmax': hrmax,'activeMin_2': activeMin2, 'activeMin_3': activeMin3, 'activeMin_4': activeMin4,
                   'mets 3 hr avg': avg3, 'calories':mldata['Calories'], 'carbs':mldata['Carbs'], 'protein':mldata['Protein'],  'fat':mldata['Fat']})
# df.to_csv(f'allmetrics{x}')
df.to_csv(f'test{x}')
print("done!")


2021-09-25 08:21:00

2021-09-25 12:22:00

2021-09-25 17:57:00

2021-09-26 10:03:00

2021-09-26 13:27:00

2021-09-26 17:14:00

2021-09-27 10:33:00

2021-09-27 14:24:00

2021-09-27 19:34:00

2021-09-28 09:41:00

2021-09-28 13:15:00

2021-09-28 18:03:00

2021-09-29 09:28:00

2021-09-29 13:05:00

2021-09-29 18:03:00

2021-09-30 07:35:00

2021-09-30 12:25:00

2021-09-30 18:54:00

2021-10-01 09:55:00

2021-10-01 13:49:00

2021-10-01 18:17:00

2021-10-02 08:48:00

2021-10-02 13:16:00

2021-10-02 18:50:00

2021-10-03 09:59:00

2021-10-03 14:30:00

2021-10-03 19:04:00

2021-10-04 08:09:00

2021-10-04 15:45:00

2021-10-04 17:58:00

2021-10-05 08:23:00
done!


In [49]:
#generate dataset for all participants
participants = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29]
for x in participants:
    dthree = []
    auc3 = []
    activeMin2 = []
    activeMin3 = []
    activeMin4 = []
    avg3 = []
    peakheight = []
    peaktime = []
    pd40 = []
    startbg = []
    iauc = []
    hrmax = []
    hrauc = []
    hriauc = []
    hrstart = []
    z1mins = []
    z2mins = []
    z3mins = []
    z4mins = []

    print("\n" + f"participant number {x}:")
    mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(x).zfill(3)}.xlsx").dropna()
    mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
    readData(x)

    for i in mldata['Meal Time']:
        print(str(i))
        activityMetrics(str(i))
        iaucAll(str(i), dthree)

    df = pd.DataFrame({'Meal Time': mldata['Meal Time'], 'snack in 3hrs': mldata['snack'], 'Meal Type': mldata['Meal Type'],
                   #'dexcom 2hr auc': dtwo, 'mets 2 hr auc': auc2, '2 hr activeMin': activeMin2, 'mets 2 hr avg': avg2,
                   'peakheight': peakheight, 'peaktime': peaktime, 'peakduration_40': pd40, 'startbg': startbg, 'iauc': iauc,
                   'dexcom 3hr auc':dthree, 'mets 3 hr auc': auc3, 'hrmax': hrmax,'hrauc': hriauc,'hrauc': hrstart, 'hrauc': hrstart,
                   'activeMin_2': activeMin2, 'activeMin_3': activeMin3, 'activeMin_4': activeMin4,
                   'mets 3 hr avg': avg3, 'calories':mldata['Calories'], 'carbs':mldata['Carbs'], 'protein':mldata['Protein'],  'fat':mldata['Fat']})
    df.to_csv(f'allmetrics{x}')
    
print("done :)")


participant number 26:
2022-08-06 08:08:00
2022-08-06 12:14:00
2022-08-06 16:48:00
2022-08-07 09:06:00
2022-08-07 12:28:00
2022-08-08 12:42:00
2022-08-08 18:00:00
2022-08-09 09:44:00
2022-08-09 13:25:00
2022-08-09 16:16:00
2022-08-10 08:58:00
2022-08-10 12:19:00
2022-08-10 16:09:00
2022-08-11 09:06:00
2022-08-11 12:43:00
2022-08-11 17:10:00
2022-08-12 09:00:00
2022-08-12 23:06:00
2022-08-13 10:01:00
2022-08-13 13:40:00
2022-08-13 16:18:00
2022-08-14 10:00:00
2022-08-14 14:00:00
2022-08-15 10:19:00
2022-08-15 12:51:00
2022-08-16 11:49:00

participant number 27:
2022-08-09 08:27:12
2022-08-09 13:30:42
2022-08-09 20:15:00
2022-08-10 08:45:44
2022-08-10 12:45:39
2022-08-10 18:38:00
2022-08-11 08:30:25
2022-08-11 13:34:21
2022-08-11 21:04:00
2022-08-12 09:20:22
2022-08-12 15:30:08
2022-08-12 19:45:00
2022-08-13 09:33:15
2022-08-13 14:00:02
2022-08-14 10:10:22
2022-08-14 13:40:08
2022-08-14 20:00:00
2022-08-15 10:15:21
2022-08-15 14:00:01
2022-08-15 20:00:00
2022-08-16 08:15:02
2022-08-16 1

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-08-16 08:08:09
2022-08-16 12:40:06
2022-08-16 18:57:00
2022-08-17 09:04:33
2022-08-17 12:51:32
2022-08-17 18:58:00
2022-08-18 09:25:56
2022-08-18 12:40:39
2022-08-18 18:53:00
2022-08-19 09:12:25
2022-08-19 12:29:40
2022-08-19 19:53:00
2022-08-20 08:59:45
2022-08-20 12:51:38
2022-08-20 19:52:00
2022-08-21 09:38:36
2022-08-21 12:23:01
2022-08-21 18:47:00
2022-08-22 09:11:59
2022-08-22 13:01:30
2022-08-22 19:56:00
2022-08-23 09:45:44
2022-08-23 12:30:38
2022-08-24 08:59:21
2022-08-24 12:39:48
2022-08-24 20:10:00
2022-08-25 09:33:59
2022-08-25 12:33:02
2022-08-26 09:33:51

participant number 29:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-09-28 08:20:12
2022-09-28 12:06:24
2022-09-28 17:22:00
2022-09-29 05:44:15
2022-09-29 11:56:25
2022-09-29 17:04:00
2022-09-30 06:18:10
2022-09-30 11:38:32
2022-09-30 16:20:00
2022-10-01 06:22:04
2022-10-01 11:53:24
2022-10-01 17:19:00
2022-10-02 06:30:27
2022-10-02 11:55:54
2022-10-02 17:37:00
2022-10-03 05:40:46
2022-10-03 11:40:54
2022-10-03 17:35:00
2022-10-04 05:57:35
2022-10-04 11:41:40
2022-10-04 17:36:00
2022-10-05 06:19:24
2022-10-05 11:39:51
2022-10-05 16:00:00
2022-10-06 05:51:14
2022-10-06 11:39:04
2022-10-06 16:34:00
2022-10-07 05:37:05
2022-10-07 11:39:10
2022-10-07 18:13:00
2022-10-08 06:31:55
done :)
