## Activity and PPGR
Calculates ppgr auc, mets auc, average level, and activeMins metrics with a datetime of each meal for 2hr and 3hr timeframes

In [12]:
import numpy as np
import pandas as pd 
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime

In [13]:
#read datafile for METs
x=1
datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
time = datafile['mins'].to_numpy()
mets = datafile['METs'].to_numpy()
datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

time = time[~np.isnan(time)]
mets = mets[~np.isnan(mets)]

#read cgm datafiles
libredata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Libre.csv")
dexcomdata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv")

In [14]:
def libreLinearInterpolation(cgm_df):
    # Convert the 'Date_Time' column to datetime type
    cgm_df['Date_Time'] = pd.to_datetime(cgm_df['Date_Time'])
    # Set the 'Date_Time' column as the index
    cgm_df.set_index('Date_Time', inplace=True)
    # Drop or fill missing values
    cgm_df.dropna(inplace=True)  # Drop rows with missing values
    # Fix duplicate error
    cgm_df = cgm_df.loc[~cgm_df.index.duplicated(), :]
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    cgm_df_interpolated = cgm_df.resample('1T').interpolate(method='linear')
    # Reset the index back to a column
    cgm_df_interpolated.reset_index(inplace=True)
    return cgm_df_interpolated

def dexcomLinearInterpolation(raw_cgm_df):
    # Convert the 'Date_Time' column to datetime type
    raw_cgm_df['Date_Time'] = pd.to_datetime(raw_cgm_df['Date_Time'])
    # Set the seconds component to 0
    raw_cgm_df['Date_Time'] = raw_cgm_df['Date_Time'].apply(lambda dt: dt.replace(second=0))
    # Set the 'Date_Time' column as the index
    raw_cgm_df.set_index('Date_Time', inplace=True)
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    interpolated_cgm_df = raw_cgm_df.resample('1T').interpolate(method='linear')
    interpolated_cgm_df
    # Reset the index back to a column
    interpolated_cgm_df.reset_index(inplace=True)
    return interpolated_cgm_df

def readData(x):
    #get globals
    global datafile
    global time
    global mets
    global libredata
    global dexcomdata 
    
    #read datafile for METs
    datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
    time = datafile['mins'].to_numpy()
    mets = datafile['METs'].to_numpy()
    datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

    time = time[~np.isnan(time)]
    mets = mets[~np.isnan(mets)]

    #read cgm datafiles
    libredata = libreLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Libre.csv")) 
    dexcomdata = dexcomLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv"))

### Calculate metrics for all the meal times from a spreadsheet
can export to csv or print yay

In [25]:
def activityMetrics(timestring):
    tuples = zip(time,mets)
    arr = np.asarray(list(tuples))

    #find start time
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile[datafile['Date_Time'].dt.date == target_time.date()]
    starttime = theday.iloc[0]['Date_Time']
    minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])

    timeframe = 180
    #filter to time interval
    filtered = arr[arr[:,0] <= minutes + timeframe]
    filtered = filtered[filtered[:,0] >= minutes]
#     plt.plot(filtered[:,0], filtered[:,1], label = "mets for 2 hrs")
#     plt.legend()
#     plt.show()

    #calculate iauc for that time interval
    activity = filtered[:,1]
    area = auc(filtered[:,0], activity)
    
    auc3.append(area)
    avg3.append(np.average(activity)/10)
    activeMin3.append(len(activity[activity > 20]))
#     print("three hours auc:", area)
#     print("three hours average:", np.average(activity)/10)
#     print("three hours activeMins:", len(activity[activity > 20]))

    filtered = filtered[filtered[:,0] <= minutes + 120]
    activity = filtered[:,1]
    area = auc(filtered[:,0], activity)
#     print("two hours auc:", area)
#     print("two hours average:", np.average(activity)/10)
#     print("two hours activeMins:", len(activity[activity > 20]))
    auc2.append(area)
    avg2.append(np.average(activity)/10)
    activeMin2.append(len(activity[activity > 20]))

In [50]:
def iaucAll(timestring, twohr, threehr, libre): 
    datafile1 = libredata if libre else dexcomdata
    
    time = datafile1['mins'].to_numpy()
    bg = datafile1['BG'].to_numpy()
    time = time[~np.isnan(time)]
    bg = bg[~np.isnan(bg)]
    datafile1['Date_Time'] = pd.to_datetime(datafile1['Date_Time'])
    
    tuples = zip(time,bg)
    arr = np.asarray(list(tuples))
    
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile1[datafile1['Date_Time'].dt.date == target_time.date()]
    
    if(len(theday) != 0):
        starttime = theday.iloc[0]['Date_Time']
        minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])
    
        #filter to time interval
        filtered = arr[arr[:,0] >= minutes]
        filtered = filtered[filtered[:,0] <= minutes + 180]

        try:
            area = auc(filtered[:,0], filtered[:,1])
            #check that start and end pts are available (will look for a better way later)
            checkstart = filtered[filtered[:,0] == minutes]
            checkend = filtered[filtered[:,0] == minutes + 180]
            if len(checkstart) == 0 or len(checkend) == 0:
                 raise ValueError('Endpoint not available')

        except ValueError:
            area = "error"
        threehr.append(area)

        filtered = filtered[filtered[:,0] <= minutes + 120]
        try:
            area = auc(filtered[:,0], filtered[:,1])
        except ValueError:
            area = "error"
        twohr.append(area)
    else:
        twohr.append("no data")
        threehr.append("no data")


In [47]:
# #calculate one person
# twohr = []
# threehr = []
# dtwo = []
# dthree = []
# auc2 = []
# auc3 = []
# activeMin2 = []
# activeMin3 = []
# avg2 = []
# avg3 = []

# participant = 2
# mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\00{participant}.xlsx").dropna()
# mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
# readData(participant)
    
# for i in mldata['Meal Time']:
#     print('\n' + str(i))
#     activityMetrics(str(i))
#     iaucAll(str(i), twohr, threehr, 1)
#     iaucAll(str(i), dtwo, dthree, 0)

# df = pd.DataFrame({'time': mldata['Meal Time'], 'ppgr 2 hr': twohr, 'ppgr 3 hr': threehr, 
#                    'two hr auc': auc2, 'two hr activeMin': activeMin2, 'two hr avg': avg2, 
#                    'three hr auc': auc3, 'three hr activeMin': activeMin3, 'three hr avg': avg3,
#                   'd2': dtwo, 'd3':dthree})
# df.to_csv(f'allmetrics{participant}')


2021-09-26 10:03:00

2021-09-26 13:27:00

2021-09-26 17:14:00

2021-09-27 10:33:00

2021-09-27 14:24:00

2021-09-27 19:34:00

2021-09-28 09:41:00

2021-09-28 13:15:00

2021-09-28 18:03:00

2021-09-29 09:28:00

2021-09-29 13:05:00

2021-09-29 18:03:00

2021-09-30 07:35:00

2021-09-30 12:25:00

2021-09-30 18:54:00

2021-10-01 09:55:00

2021-10-01 13:49:00

2021-10-01 18:17:00

2021-10-02 08:48:00

2021-10-02 13:16:00

2021-10-02 18:50:00

2021-10-03 09:59:00

2021-10-03 14:30:00

2021-10-03 19:04:00

2021-10-04 08:09:00

2021-10-04 15:45:00

2021-10-04 17:58:00

2021-10-05 08:23:00


In [53]:
#generate dataset for all participants
participants = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29]
for x in participants:
    twohr = []
    threehr = []
    dtwo = []
    dthree = []
    auc2 = []
    auc3 = []
    activeMin2 = []
    activeMin3 = []
    avg2 = []
    avg3 = []

    print("\n" + f"participant number {x}:")
    mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(x).zfill(3)}.xlsx").dropna()
    mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
    readData(x)

    for i in mldata['Meal Time']:
        print(str(i))
        activityMetrics(str(i))
        iaucAll(str(i), twohr, threehr, 1)
        iaucAll(str(i), dtwo, dthree, 0)

    df = pd.DataFrame({'time': mldata['Meal Time'],'ppgr 2 hr': twohr, 'ppgr 3 hr': threehr, 
                       'two hr auc': auc2, 'two hr activeMin': activeMin2, 'two hr avg': avg2, 
                       'three hr auc': auc3, 'three hr activeMin': activeMin3, 'three hr avg': avg3,
                      'd2': dtwo, 'd3':dthree})
    df.to_csv(f'allmetrics{x}')
print("done :)")


participant number 22:
2022-06-01 08:37:00
2022-06-01 12:27:00
2022-06-02 08:59:00
2022-06-02 13:37:00
2022-06-02 17:50:47
2022-06-03 09:03:00
2022-06-03 12:34:00
2022-06-03 19:13:56
2022-06-04 09:01:00
2022-06-04 12:37:00
2022-06-04 19:37:24
2022-06-05 08:48:00
2022-06-05 12:35:00
2022-06-05 20:01:51
2022-06-06 09:05:00
2022-06-06 12:45:00
2022-06-06 19:05:08
2022-06-07 09:03:00
2022-06-07 12:29:00
2022-06-07 19:23:32
2022-06-08 09:01:00
2022-06-08 13:08:00
2022-06-08 19:15:48
2022-06-09 09:07:00
2022-06-09 12:21:00
2022-06-09 19:00:11
2022-06-10 09:00:00
2022-06-10 12:20:00
2022-06-11 09:08:00
done :)
