## Activity and PPGR
Calculates ppgr auc, mets auc, average level, and activeMins metrics with a datetime of each meal for 2hr and 3hr timeframes

In [12]:
import numpy as np
import pandas as pd 
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime

In [13]:
#read datafile for METs
x=1
datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
time = datafile['mins'].to_numpy()
mets = datafile['METs'].to_numpy()
datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

time = time[~np.isnan(time)]
mets = mets[~np.isnan(mets)]

#read cgm datafiles
libredata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Libre.csv")
dexcomdata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv")

In [14]:
def libreLinearInterpolation(cgm_df):
    # Convert the 'Date_Time' column to datetime type
    cgm_df['Date_Time'] = pd.to_datetime(cgm_df['Date_Time'])
    # Set the 'Date_Time' column as the index
    cgm_df.set_index('Date_Time', inplace=True)
    # Drop or fill missing values
    cgm_df.dropna(inplace=True)  # Drop rows with missing values
    # Fix duplicate error
    cgm_df = cgm_df.loc[~cgm_df.index.duplicated(), :]
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    cgm_df_interpolated = cgm_df.resample('1T').interpolate(method='linear')
    # Reset the index back to a column
    cgm_df_interpolated.reset_index(inplace=True)
    return cgm_df_interpolated

def dexcomLinearInterpolation(raw_cgm_df):
    # Convert the 'Date_Time' column to datetime type
    raw_cgm_df['Date_Time'] = pd.to_datetime(raw_cgm_df['Date_Time'])
    # Set the seconds component to 0
    raw_cgm_df['Date_Time'] = raw_cgm_df['Date_Time'].apply(lambda dt: dt.replace(second=0))
    # Set the 'Date_Time' column as the index
    raw_cgm_df.set_index('Date_Time', inplace=True)
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    interpolated_cgm_df = raw_cgm_df.resample('1T').interpolate(method='linear')
    interpolated_cgm_df
    # Reset the index back to a column
    interpolated_cgm_df.reset_index(inplace=True)
    return interpolated_cgm_df

def readData(x):
    #globals
    global datafile
    global time
    global mets
    global libredata
    global dexcomdata 
    
    #read datafile for METs
    datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
    time = datafile['mins'].to_numpy()
    mets = datafile['METs'].to_numpy()
    datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

    time = time[~np.isnan(time)]
    mets = mets[~np.isnan(mets)]

    #read cgm datafiles
    libredata = libreLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Libre.csv")) 
    dexcomdata = dexcomLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv"))

### Calculate metrics for all the meal times from a spreadsheet
can export to csv or print yay

In [15]:
def activityMetrics(timestring):
    tuples = zip(time,mets)
    arr = np.asarray(list(tuples))

    #find start time
    target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    theday = datafile[datafile['Date_Time'].dt.date == target_time.date()]
    starttime = theday.iloc[0]['Date_Time']
    minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])

    timeframe = 180
    #filter to time interval
    filtered = arr[arr[:,0] <= minutes + timeframe]
    filtered = filtered[filtered[:,0] >= minutes]
#     plt.plot(filtered[:,0], filtered[:,1], label = "mets for 2 hrs")
#     plt.legend()
#     plt.show()

    #calculate iauc for that time interval
    activity = filtered[:,1]
    area = auc(filtered[:,0], activity)
    
    auc3.append(area)
    avg3.append(np.average(activity)/10)
    activeMin3.append(len(activity[activity > 20]))
#     print("three hours auc:", area)
#     print("three hours average:", np.average(activity)/10)
#     print("three hours activeMins:", len(activity[activity > 20]))

    filtered = filtered[filtered[:,0] <= minutes + 120]
    activity = filtered[:,1]
    area = auc(filtered[:,0], activity)
#     print("two hours auc:", area)
#     print("two hours average:", np.average(activity)/10)
#     print("two hours activeMins:", len(activity[activity > 20]))
    auc2.append(area)
    avg2.append(np.average(activity)/10)
    activeMin2.append(len(activity[activity > 20]))

In [16]:
def iaucAll(timestring, twohr, threehr, libre): 
    datafile1 = libredata if libre else dexcomdata
    
    time = datafile1['mins'].to_numpy()
    bg = datafile1['BG'].to_numpy()
    time = time[~np.isnan(time)]
    bg = bg[~np.isnan(bg)]
    datafile1['Date_Time'] = pd.to_datetime(datafile1['Date_Time'])
    
    target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    theday = datafile1[datafile1['Date_Time'].dt.date == target_time.date()]
    starttime = theday.iloc[0]['Date_Time']
    minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])
    
    tuples = zip(time,bg)
    arr = np.asarray(list(tuples))

    #filter to time interval
    filtered = arr[arr[:,0] >= minutes]
    filtered = filtered[filtered[:,0] <= minutes + 180]
    #plt.plot(filtered[:,0], filtered[:,1], label = "3 hr ppgr")
    
    try:
        area = auc(filtered[:,0], filtered[:,1])
        #check that start and end pts are available (will look for a better way later)
        checkstart = filtered[filtered[:,0] == minutes]
        checkend = filtered[filtered[:,0] == minutes + 180]
        if len(checkstart) == 0 or len(checkend) == 0:
             raise ValueError('Endpoint not available')
            
    except ValueError:
        area = "error"
    threehr.append(area)

    filtered = filtered[filtered[:,0] <= minutes + 120]
    #
   
    #calculate iauc for that time interval
    try:
        area = auc(filtered[:,0], filtered[:,1])
    except ValueError:
        area = "error"
    twohr.append(area)


In [17]:
#calculate one person
twohr = []
threehr = []
dtwo = []
dthree = []
auc2 = []
auc3 = []
activeMin2 = []
activeMin3 = []
avg2 = []
avg3 = []

participant = 2
mldata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\00{participant}.csv").dropna()
mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
readData(participant)
    
for i in mldata['Meal Time']:
    print('\n' + str(i))
    activityMetrics(str(i))
    iaucAll(str(i), twohr, threehr, 1)
    iaucAll(str(i), dtwo, dthree, 0)

df = pd.DataFrame({'time': mldata['Meal Time'], 'ppgr 2 hr': twohr, 'ppgr 3 hr': threehr, 
                   'two hr auc': auc2, 'two hr activeMin': activeMin2, 'two hr avg': avg2, 
                   'three hr auc': auc3, 'three hr activeMin': activeMin3, 'three hr avg': avg3,
                  'd2': dtwo, 'd3':dthree})
df.to_csv(f'allmetrics{participant}')


9/26/2021 10:03

9/26/2021 13:27

9/26/2021 17:14

9/27/2021 10:33

9/27/2021 14:24

9/27/2021 19:34

9/28/2021 9:41

9/28/2021 13:15

9/28/2021 18:03

9/29/2021 9:28

9/29/2021 13:05

9/29/2021 18:03

9/30/2021 7:35

9/30/2021 12:25

9/30/2021 18:54

10/1/2021 9:55

10/1/2021 13:49

10/1/2021 18:17

10/2/2021 8:48

10/2/2021 13:16

10/2/2021 18:50

10/3/2021 9:59

10/3/2021 14:30

10/3/2021 19:04

10/4/2021 8:09

10/4/2021 15:45

10/4/2021 17:58

10/5/2021 8:23


In [18]:
#generate dataset for all participants
participants = [1,2,3,4,5]
for x in participants:
    twohr = []
    threehr = []
    dtwo = []
    dthree = []
    auc2 = []
    auc3 = []
    activeMin2 = []
    activeMin3 = []
    avg2 = []
    avg3 = []

    mldata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\00{x}.csv").dropna()
    mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
    readData(x)

    for i in mldata['Meal Time']:
        print('\n' + i)
        activityMetrics(str(i))
        iaucAll(str(i), twohr, threehr, 1)
        iaucAll(str(i), dtwo, dthree, 0)

    df = pd.DataFrame({'time': mldata['Meal Time'],'ppgr 2 hr': twohr, 'ppgr 3 hr': threehr, 
                       'two hr auc': auc2, 'two hr activeMin': activeMin2, 'two hr avg': avg2, 
                       'three hr auc': auc3, 'three hr activeMin': activeMin3, 'three hr avg': avg3,
                      'd2': dtwo, 'd3':dthree})
    df.to_csv(f'allmetrics{x}')
print("done")


9/18/2021 11:21

9/18/2021 14:23

9/18/2021 20:48

9/19/2021 8:18

9/19/2021 12:00

9/19/2021 20:10

9/20/2021 9:07

9/20/2021 14:51

9/20/2021 19:52

9/21/2021 9:17

9/21/2021 12:59

9/21/2021 19:42

9/22/2021 9:22

9/22/2021 13:16

9/22/2021 21:05

9/23/2021 8:54

9/23/2021 12:25

9/23/2021 19:56

9/23/2021 21:19

9/24/2021 10:07

9/24/2021 14:12

9/24/2021 19:36

9/25/2021 9:58

9/25/2021 13:26

9/25/2021 20:16

9/26/2021 9:35

9/26/2021 13:46

9/26/2021 18:26

9/27/2021 9:27

9/27/2021 12:47

9/27/2021 20:00

9/28/2021 10:58

9/26/2021 10:03

9/26/2021 13:27

9/26/2021 17:14

9/27/2021 10:33

9/27/2021 14:24

9/27/2021 19:34

9/28/2021 9:41

9/28/2021 13:15

9/28/2021 18:03

9/29/2021 9:28

9/29/2021 13:05

9/29/2021 18:03

9/30/2021 7:35

9/30/2021 12:25

9/30/2021 18:54

10/1/2021 9:55

10/1/2021 13:49

10/1/2021 18:17

10/2/2021 8:48

10/2/2021 13:16

10/2/2021 18:50

10/3/2021 9:59

10/3/2021 14:30

10/3/2021 19:04

10/4/2021 8:09

10/4/2021 15:45

10/4/2021 17:58

10/5/2021 8