# Dataset Generation: Activity and PPGR
Creates dataset and calculates ppgr auc, mets auc, average level, and activeMins metrics with a datetime of each meal for 2hr and 3hr timeframes 

In [79]:
import numpy as np
import pandas as pd 
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import warnings

## Process data
read in all the files 

In [80]:
#read datafile for METs
x=1
datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
time = datafile['mins'].to_numpy()
mets = datafile['METs'].to_numpy()
hr = datafile['HeartRate'].to_numpy()
datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

time = time[~np.isnan(time)]
mets = mets[~np.isnan(mets)]
hr = hr[~np.isnan(hr)]

#read cgm datafiles
dexcomdata = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv")

In [81]:
def libreLinearInterpolation(cgm_df):
    # Convert the 'Date_Time' column to datetime type
    cgm_df['Date_Time'] = pd.to_datetime(cgm_df['Date_Time'])
    # Set the 'Date_Time' column as the index
    cgm_df.set_index('Date_Time', inplace=True)
    # Drop or fill missing values
    cgm_df.dropna(inplace=True)  # Drop rows with missing values
    # Fix duplicate error
    cgm_df = cgm_df.loc[~cgm_df.index.duplicated(), :]
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    cgm_df_interpolated = cgm_df.resample('1T').interpolate(method='linear')
    # Reset the index back to a column
    cgm_df_interpolated.reset_index(inplace=True)
    return cgm_df_interpolated

def dexcomLinearInterpolation(raw_cgm_df):
    # Convert the 'Date_Time' column to datetime type
    raw_cgm_df['Date_Time'] = pd.to_datetime(raw_cgm_df['Date_Time'])
    # Set the seconds component to 0
    raw_cgm_df['Date_Time'] = raw_cgm_df['Date_Time'].apply(lambda dt: dt.replace(second=0))
    # Set the 'Date_Time' column as the index
    raw_cgm_df.set_index('Date_Time', inplace=True)
    # Resample the DataFrame to 1-minute intervals and perform linear interpolation
    interpolated_cgm_df = raw_cgm_df.resample('1T').interpolate(method='linear')
    interpolated_cgm_df
    # Reset the index back to a column
    interpolated_cgm_df.reset_index(inplace=True)
    return interpolated_cgm_df

def readData(x):
    #get globals
    global datafile
    global time
    global mets
    global hr
    global libredata
    global dexcomdata 
    
    #read datafile for METs
    datafile = pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_Fitbit.csv")
    time = datafile['mins'].to_numpy()
    mets = datafile['METs'].to_numpy()
    hr = datafile['HeartRate'].to_numpy()
    datafile['Date_Time'] = pd.to_datetime(datafile['Date_Time'])

    time = time[~np.isnan(time)]
    mets = mets[~np.isnan(mets)]
    hr = hr[~np.isnan(hr)]

    #read cgm datafiles
    dexcomdata = dexcomLinearInterpolation(pd.read_csv(f"C:\\Users\\namil\\Downloads\\CaM01-{str(x).zfill(3)}_CGM_Dexcom.csv"))

def getPersonal(x):
    datafile = pd.read_csv("C:\\Users\\namil\\Documents\\stmi-lab-namila\\demographics_raw.csv")
    individual = datafile.iloc[x-1,:]
    return individual
    

### Calculate metrics for all the meal times from a spreadsheet
can export to csv or print yay

In [82]:
hrmax = []
hrauc = []
hriauc = []
hrstart = []
z1mins = []
z2mins = []
z3mins = []
z4mins = []

def activityMetrics(timestring):
    tuples = zip(time,mets,hr)
    arr = np.asarray(list(tuples))

    #find start time
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile[datafile['Date_Time'].dt.date == target_time.date()]
    starttime = theday.iloc[0]['Date_Time']
    minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])

    timeframe = 180
    #filter to time interval
    filtered = arr[arr[:,0] <= minutes + timeframe]
    filtered = filtered[filtered[:,0] >= minutes]
    activity = filtered[:,1]

    #calculations
    try:
        au = auc(filtered[:,0], filtered[:,2])
        st = filtered[0,2]
        area = auc(filtered[:,0], activity)
        
        auc3.append(area)
        hrmax.append(np.max(filtered[:,2]))
        hrauc.append(au)
        hrstart.append(st)
        hriauc.append(au - timeframe*st)
        
    except ValueError:
        auc3.append("error")
        hrmax.append("error")
        hrauc.append("error")
        hrstart.append("error")
        hriauc.append("error")
    
    #for zones, use participant number to find age and store that as a variable    
    
    avg3.append(np.average(activity)/10)
    activeMin2.append(len(activity[activity > 20]))
    activeMin3.append(len(activity[activity > 30]))
    activeMin4.append(len(activity[activity > 30]))

In [83]:
peakheight = []
peaktime = []
pd40 = []
startbg = []
iauc = []

def iaucAll(timestring, threehr): 
    datafile1 = dexcomdata
    
    time = datafile1['mins'].to_numpy()
    bg = datafile1['BG'].to_numpy()
    time = time[~np.isnan(time)]
    bg = bg[~np.isnan(bg)]
    datafile1['Date_Time'] = pd.to_datetime(datafile1['Date_Time'])
    
    tuples = zip(time,bg)
    arr = np.asarray(list(tuples))
    
    #target_time = datetime.strptime(timestring, '%m/%d/%Y %H:%M')
    target_time = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S')
    theday = datafile1[datafile1['Date_Time'].dt.date == target_time.date()]
    
    try:
        if(len(theday) == 0):
            raise ValueError("Data for this date is not available")
        starttime = theday.iloc[0]['Date_Time']
        minutes = int((target_time - starttime).total_seconds() / 60 + theday.iloc[0]['mins'])
    
        #filter to time interval
        filtered = arr[arr[:,0] >= minutes]
        filtered = filtered[filtered[:,0] <= minutes + 180]
        cgmdata = filtered[:,1]
        
        area = auc(filtered[:,0], cgmdata)
        
        #check that start and end pts are available (will look for a better way later)
        checkstart = filtered[filtered[:,0] == minutes]
        checkend = filtered[filtered[:,0] == minutes + 180]
        
        if len(checkstart) == 0 or len(checkend) == 0:
             raise ValueError('Endpoint not available')
                        
        peak = np.max(cgmdata)
        ptime = filtered[np.argmax(cgmdata), 0]
        pd4 = len(cgmdata[cgmdata > peak*.6])
        start = cgmdata[0]
        iaucval = area - start*180
       
    except ValueError:
        area = "error"
        peak = "error"
        pd4 = "error"
        ptime = "error"
        start = "error"
        iaucval = "error"
        
    threehr.append(area)
    peakheight.append(peak)
    pd40.append(pd4)
    peaktime.append(ptime)
    startbg.append(start)
    iauc.append(iaucval)

    

In [72]:
#calculate one person
dthree = []
auc3 = []
activeMin2 = []
activeMin3 = []
activeMin4 = []
avg3 = []
peakheight = []
peaktime = []
pd40 = []
startbg = []
iauc = []

participant = 2
mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(participant).zfill(3)}.xlsx").dropna()
mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
print(getPersonal(participant))
readData(participant)
    
# for i in mldata['Meal Time']:
#     print(str(i))
#     activityMetrics(str(i))
#     iaucAll(str(i), dthree)

# df = pd.DataFrame({'Meal Time': mldata['Meal Time'], 'snack in 3hrs': mldata['snack'], 'Meal Type': mldata['Meal Type'],
#                    #'dexcom 2hr auc': dtwo, 'mets 2 hr auc': auc2, '2 hr activeMin': activeMin2, 'mets 2 hr avg': avg2,
#                    'peakheight': peakheight, 'peaktime': peaktime, 'peakduration_40': pd40, 'startbg': startbg, 'iauc': iauc,
#                    'dexcom 3hr auc':dthree, 'mets 3 hr auc': auc3, 'hrmax': hrmax,'activeMin_2': activeMin2, 'activeMin_3': activeMin3, 'activeMin_4': activeMin4,
#                    'mets 3 hr avg': avg3, 'calories':mldata['Calories'], 'carbs':mldata['Carbs'], 'protein':mldata['Protein'],  'fat':mldata['Fat']})
# # df.to_csv(f'allmetrics{x}')
# df.to_csv(f'test{x}')
# print("done!")

Participant                                CaM#:01-002
Start Date (Initiation Visit)                9/25/2021
Age                                                 49
Gender                                               F
Body weight                                      169.2
Height                                           5'-2"
Self-identify                    Hispanic/Latino-White
Self-diagnose Diabetes                     Prediabetes
A1c PDL (Lab)                                      5.5
Fasting GLU - PDL (Lab)                             93
Insulin                                           14.8
Triglycerides                                       61
Cholesterol                                        181
HDL                                                 91
Non HDL                                             90
LDL (Cal)                                           78
VLDL (Cal)                                          12
Cho/HDL Ratio                                        2
Collection

In [89]:
#generate datasets for all participants
participants = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29]
for x in participants:
    dthree = []
    auc3 = []
    activeMin2 = []
    activeMin3 = []
    activeMin4 = []
    avg3 = []
    peakheight = []
    peaktime = []
    pd40 = []
    startbg = []
    iauc = []
    hrmax = []
    hrauc = []
    hriauc = []
    hrstart = []
    z1mins = []
    z2mins = []
    z3mins = []
    z4mins = []

    print("\n" + f"participant number {x}:")
    
    mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(x).zfill(3)}.xlsx").dropna()
    mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
    readData(x)

    for i in mldata['Meal Time']:
        print(str(i))
        activityMetrics(str(i))
        iaucAll(str(i), dthree)

    df = pd.DataFrame({'Meal Time': mldata['Meal Time'], 'snack in 3hrs': mldata['snack'], 'Meal Type': mldata['Meal Type'],
                   'hrmax': hrmax,'hrauc': hriauc,'hrauc': hrstart, 'hrauc': hrstart,
                   #'dexcom 2hr auc': dtwo, 'mets 2 hr auc': auc2, '2 hr activeMin': activeMin2, 'mets 2 hr avg': avg2,
                   'peakheight': peakheight, 'peaktime': peaktime, 'peakduration_40': pd40, 'startbg': startbg, 'iauc': iauc,
                   'dexcom 3hr auc':dthree, 'mets 3 hr auc': auc3, 
                   'activeMin_2': activeMin2, 'activeMin_3': activeMin3, 'activeMin_4': activeMin4,
                   'mets 3 hr avg': avg3, 'calories':mldata['Calories'], 'carbs':mldata['Carbs'], 'protein':mldata['Protein'],  'fat':mldata['Fat']})
    df.to_csv(f'allmetrics{x}')
    
print("done :)")


participant number 1:
2021-09-18 14:23:00
2021-09-18 20:48:00
2021-09-19 08:18:00
2021-09-19 12:00:00
2021-09-19 20:10:00
2021-09-20 09:07:00
2021-09-20 14:51:00
2021-09-20 19:52:00
2021-09-21 09:17:00
2021-09-21 12:59:00
2021-09-21 19:42:00
2021-09-22 09:22:00
2021-09-22 13:16:00
2021-09-22 21:05:00
2021-09-23 08:54:00
2021-09-23 12:25:00
2021-09-23 19:56:00
2021-09-23 21:19:00
2021-09-24 10:07:00
2021-09-24 14:12:00
2021-09-24 19:36:00
2021-09-25 09:58:00
2021-09-25 13:26:00
2021-09-25 20:16:00
2021-09-26 09:35:00
2021-09-26 13:46:00
2021-09-26 18:26:00
2021-09-27 09:27:00
2021-09-27 12:47:00
2021-09-27 20:00:00

participant number 2:
2021-09-25 08:21:00
2021-09-25 12:22:00
2021-09-25 17:57:00
2021-09-26 10:03:00
2021-09-26 13:27:00
2021-09-26 17:14:00
2021-09-27 10:33:00
2021-09-27 14:24:00
2021-09-27 19:34:00
2021-09-28 09:41:00
2021-09-28 13:15:00
2021-09-28 18:03:00
2021-09-29 09:28:00
2021-09-29 13:05:00
2021-09-29 18:03:00
2021-09-30 07:35:00
2021-09-30 12:25:00
2021-09-30 18:

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2021-10-01 08:04:00
2021-10-01 11:30:00
2021-10-01 18:57:00
2021-10-02 09:52:00
2021-10-02 13:29:00
2021-10-02 19:40:00
2021-10-03 09:49:00
2021-10-03 13:37:00
2021-10-03 16:45:00
2021-10-04 09:01:00
2021-10-04 12:34:00
2021-10-04 18:03:00
2021-10-05 08:56:00
2021-10-05 12:25:00
2021-10-05 20:23:00
2021-10-05 20:54:00
2021-10-06 08:56:00
2021-10-06 12:25:00
2021-10-06 17:42:00
2021-10-06 22:39:00
2021-10-07 08:49:00
2021-10-07 12:26:00
2021-10-07 20:17:00
2021-10-08 08:57:00
2021-10-08 12:23:00
2021-10-08 18:13:00
2021-10-09 09:33:00
2021-10-09 15:02:00
2021-10-09 18:19:00
2021-10-09 21:59:00
2021-10-10 09:30:00
2021-10-10 13:18:00
2021-10-10 18:01:06

participant number 5:
2021-12-06 13:08:00
2021-12-06 19:42:00
2021-12-07 09:51:00
2021-12-07 13:28:00
2021-12-07 19:55:00
2021-12-08 09:50:00
2021-12-08 13:22:00
2021-12-08 18:19:00
2021-12-09 10:00:00
2021-12-09 14:58:00
2021-12-09 20:22:00
2021-12-10 09:56:00
2021-12-10 13:57:00
2021-12-10 20:21:00
2021-12-11 08:52:00
2021-12-11 12:36:

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2021-12-06 09:56:00
2021-12-06 13:08:00
2021-12-06 19:15:00
2021-12-07 07:18:00
2021-12-07 12:06:00
2021-12-07 19:21:00
2021-12-08 07:30:00
2021-12-08 12:48:00
2021-12-08 18:42:00
2021-12-09 06:50:00
2021-12-09 12:07:00
2021-12-09 19:26:00
2021-12-10 08:50:00
2021-12-10 13:23:00
2021-12-10 19:16:00
2021-12-11 10:04:00
2021-12-11 14:56:00
2021-12-11 21:06:00
2021-12-12 10:05:00
2021-12-12 15:07:00
2021-12-12 20:17:00
2021-12-13 09:08:00
2021-12-13 14:30:00
2021-12-13 18:58:00
2021-12-14 08:00:00
2021-12-14 12:00:00
2021-12-14 20:19:00
2021-12-15 08:45:00
2021-12-15 12:52:00
2021-12-15 19:15:00
2021-12-16 07:16:00

participant number 7:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2021-12-13 09:25:00
2021-12-13 12:38:00
2021-12-13 19:26:00
2021-12-14 08:36:00
2021-12-14 12:25:00
2021-12-14 19:16:00
2021-12-15 08:38:00
2021-12-15 12:23:00
2021-12-15 19:18:00
2021-12-16 08:31:00
2021-12-16 12:17:00
2021-12-16 18:54:00
2021-12-17 08:32:00
2021-12-17 12:18:00
2021-12-18 08:47:00
2021-12-18 12:12:00
2021-12-18 20:28:00
2021-12-19 08:37:00
2021-12-19 12:51:00
2021-12-19 18:30:00
2021-12-20 08:57:00
2021-12-20 12:24:00
2021-12-20 19:17:00
2021-12-21 08:24:00
2021-12-21 12:28:00
2021-12-21 18:10:00
2021-12-22 08:33:00
2021-12-22 12:14:00
2021-12-22 18:24:00
2021-12-23 07:51:00

participant number 8:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2021-12-13 09:29:00
2021-12-13 12:34:00
2021-12-13 19:26:00
2021-12-14 09:17:00
2021-12-14 12:50:00
2021-12-14 20:40:00
2021-12-15 07:52:00
2021-12-15 12:22:00
2021-12-15 19:18:00
2021-12-16 08:01:00
2021-12-16 12:17:00
2021-12-16 18:48:00
2021-12-17 08:06:00
2021-12-17 13:41:00
2021-12-17 18:29:00
2021-12-18 09:26:00
2021-12-18 14:38:00
2021-12-19 09:57:00
2021-12-19 13:15:00
2021-12-19 20:23:00
2021-12-20 08:13:00
2021-12-20 12:24:00
2021-12-20 18:46:00
2021-12-21 08:07:00
2021-12-21 12:30:00
2021-12-21 17:53:00
2021-12-21 17:53:00
2021-12-22 08:01:00
2021-12-22 12:14:00
2021-12-22 18:12:00
2021-12-23 08:12:00

participant number 9:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-01-24 08:27:00
2022-01-24 12:13:00
2022-01-24 19:16:00
2022-01-25 09:10:00
2022-01-25 12:53:00
2022-01-26 07:54:00
2022-01-26 11:09:00
2022-01-27 09:28:00
2022-01-27 12:57:00
2022-01-27 17:27:00
2022-01-28 07:38:00
2022-01-28 13:05:00
2022-01-28 18:29:00
2022-01-29 08:24:00
2022-01-29 12:38:00
2022-01-29 18:13:00
2022-01-30 08:39:00
2022-01-30 12:13:00
2022-01-31 07:37:00
2022-01-31 12:24:00
2022-01-31 18:57:00
2022-01-31 18:57:00
2022-02-01 07:29:00
2022-02-01 12:23:00
2022-02-01 17:59:00
2022-02-02 07:51:00
2022-02-02 12:29:00
2022-02-02 19:29:00
2022-02-03 08:08:00

participant number 10:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-02-11 08:34:00
2022-02-11 11:47:00
2022-02-11 18:43:00
2022-02-12 06:40:00
2022-02-12 11:00:00
2022-02-12 16:06:00
2022-02-13 06:41:00
2022-02-13 11:33:00
2022-02-13 18:15:00
2022-02-14 06:48:00
2022-02-14 11:21:00
2022-02-14 16:30:00
2022-02-15 04:57:00
2022-02-15 11:53:00
2022-02-16 05:03:00
2022-02-16 11:46:00
2022-02-17 05:25:00
2022-02-17 11:20:00
2022-02-17 16:46:00
2022-02-18 05:16:00
2022-02-18 11:46:00
2022-02-18 15:01:00
2022-02-19 06:06:00
2022-02-19 11:10:00
2022-02-20 07:27:00
2022-02-20 13:19:00
2022-02-21 05:40:00

participant number 11:
2022-02-28 08:42:00
2022-02-28 12:17:00
2022-03-01 07:25:00
2022-03-01 12:00:00
2022-03-02 07:30:00
2022-03-02 11:58:00
2022-03-02 18:53:00
2022-03-03 07:32:00
2022-03-03 12:01:00
2022-03-03 19:05:00
2022-03-04 07:00:00
2022-03-04 13:34:00
2022-03-04 17:58:00
2022-03-05 09:15:00
2022-03-05 12:57:00
2022-03-05 17:50:00
2022-03-06 10:22:00
2022-03-06 12:25:00
2022-03-06 17:43:00
2022-03-07 07:52:00
2022-03-07 11:52:00
2022-03-07 19:04

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-02-22 08:11:00
2022-02-22 15:04:00
2022-02-22 20:48:00
2022-02-23 09:16:00
2022-02-23 14:22:00
2022-02-23 20:49:00
2022-02-24 09:39:00
2022-02-24 14:21:00
2022-02-24 20:42:00
2022-02-25 09:10:00
2022-02-25 14:23:00
2022-02-25 20:59:00
2022-02-26 11:28:00
2022-02-26 16:06:00
2022-02-26 20:04:00
2022-02-27 09:46:00
2022-02-27 14:27:00
2022-02-27 21:24:00
2022-02-28 09:23:00
2022-02-28 15:01:00
2022-02-28 22:26:00
2022-03-01 09:58:00
2022-03-01 14:25:00
2022-03-01 20:28:00
2022-03-02 10:59:00
2022-03-02 14:49:00
2022-03-02 19:22:00
2022-03-03 09:22:00


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-03-03 14:42:00
2022-03-03 21:17:00
2022-03-04 09:33:00

participant number 13:
2022-03-04 08:05:00
2022-03-04 11:52:00
2022-03-04 17:51:00
2022-03-05 07:28:00
2022-03-05 11:37:00
2022-03-05 16:04:00
2022-03-06 08:42:00
2022-03-06 11:56:00
2022-03-06 15:19:00
2022-03-07 07:43:00
2022-03-07 11:45:00
2022-03-07 16:07:00
2022-03-08 07:49:00
2022-03-08 12:10:00
2022-03-08 16:23:00
2022-03-09 09:23:00
2022-03-09 12:25:00
2022-03-09 16:25:00
2022-03-10 07:21:00
2022-03-10 11:20:00
2022-03-10 15:56:00
2022-03-11 06:57:00
2022-03-11 11:44:00
2022-03-11 16:50:00
2022-03-12 06:56:00
2022-03-12 06:56:00
2022-03-12 06:56:00
2022-03-12 06:56:00
2022-03-12 06:56:00
2022-03-12 12:39:00
2022-03-12 15:41:00
2022-03-13 08:08:00
2022-03-13 11:04:00
2022-03-13 15:58:00
2022-03-14 07:07:00

participant number 14:
2022-03-30 08:38:00
2022-03-30 11:51:00
2022-03-30 19:01:00
2022-03-31 08:15:00
2022-03-31 12:44:00
2022-03-31 17:40:00
2022-04-01 09:10:00
2022-04-01 13:11:00
2022-04-01 18:06:00
2022-04-02 0

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-04-18 16:40:00

participant number 18:
2022-04-13 08:14:32
2022-04-13 13:13:59
2022-04-13 18:26:00
2022-04-23 10:18:45
2022-04-23 13:24:42
2022-04-23 18:39:00
2022-04-24 09:36:47
2022-04-24 13:22:14
2022-04-24 20:17:00
2022-04-25 07:57:46
2022-04-25 13:08:22
2022-04-26 08:20:26
2022-04-26 13:28:09
2022-04-26 18:21:00
2022-04-27 09:12:19
2022-04-27 13:26:34
2022-04-27 17:25:00
2022-04-28 08:15:18
2022-04-28 14:50:02
2022-04-29 08:47:42
2022-04-29 13:01:29
2022-04-29 17:49:00
2022-04-30 07:26:54
2022-04-30 13:32:37
2022-04-30 19:02:00
2022-05-01 08:36:54
2022-05-01 13:03:56
2022-05-01 18:09:00
2022-05-02 08:03:16

participant number 19:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-04-16 09:10:00
2022-04-16 12:40:00
2022-04-16 22:52:00
2022-04-17 09:44:00
2022-04-17 13:26:00
2022-04-17 16:06:00
2022-04-18 09:30:00
2022-04-18 13:04:00
2022-04-18 18:34:00
2022-04-19 09:18:00
2022-04-19 12:30:00
2022-04-19 15:55:00
2022-04-20 09:39:00
2022-04-20 14:14:00
2022-04-20 19:10:00
2022-04-21 13:26:00
2022-04-21 17:55:00
2022-04-22 09:19:00
2022-04-22 12:26:00
2022-04-22 16:42:00
2022-04-23 09:26:00
2022-04-23 12:32:00
2022-04-23 18:41:00
2022-04-24 10:21:00
2022-04-24 14:44:00
2022-04-24 18:28:00
2022-04-25 09:06:00
2022-04-25 12:45:00
2022-04-25 18:13:00
2022-04-26 09:08:00

participant number 20:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-06-06 08:17:00
2022-06-06 12:35:00
2022-06-06 18:11:12
2022-06-07 05:00:00
2022-06-07 10:24:00
2022-06-07 16:25:24
2022-06-08 05:14:00
2022-06-08 10:24:00
2022-06-08 17:31:40
2022-06-09 04:48:00
2022-06-09 10:27:00
2022-06-09 16:28:17
2022-06-10 05:06:00
2022-06-10 10:25:00
2022-06-10 18:53:28
2022-06-11 04:56:00
2022-06-11 10:18:00
2022-06-11 18:25:29
2022-06-12 07:22:00
2022-06-12 12:51:00
2022-06-12 17:34:47
2022-06-13 07:51:00
2022-06-13 12:58:00
2022-06-14 05:00:00
2022-06-14 10:28:00
2022-06-14 19:45:57
2022-06-15 05:06:00
2022-06-15 10:28:00
2022-06-16 04:58:00

participant number 21:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-05-23 08:16:00
2022-05-23 13:00:00
2022-05-24 19:55:33
2022-05-24 05:53:00
2022-05-24 12:29:00
2022-05-24 20:23:10
2022-05-25 05:56:00
2022-05-25 12:41:00
2022-05-25 18:56:25
2022-05-26 07:22:00
2022-05-26 13:22:00
2022-05-26 20:26:00
2022-05-27 07:34:00
2022-05-27 13:16:00
2022-05-27 18:21:16
2022-05-28 06:36:00
2022-05-28 12:26:00
2022-05-28 19:23:25
2022-05-29 05:35:00
2022-05-29 11:00:00
2022-05-29 17:53:40
2022-05-30 06:30:00
2022-05-30 12:41:00
2022-05-30 19:27:40
2022-05-31 07:24:00
2022-05-31 13:18:00
2022-05-31 18:12:13
2022-06-01 05:27:00
2022-06-01 13:00:00
2022-06-01 20:52:17
2022-06-02 07:52:00

participant number 22:
2022-06-01 08:37:00
2022-06-01 12:27:00
2022-06-02 08:59:00
2022-06-02 13:37:00
2022-06-02 17:50:47
2022-06-03 09:03:00
2022-06-03 12:34:00
2022-06-03 19:13:56
2022-06-04 09:01:00
2022-06-04 12:37:00
2022-06-04 19:37:24
2022-06-05 08:48:00
2022-06-05 12:35:00
2022-06-05 20:01:51
2022-06-06 09:05:00
2022-06-06 12:45:00
2022-06-06 19:05:08
2022-06-07 09:03

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-06-27 08:48:00
2022-06-27 11:58:00
2022-06-27 15:18:41
2022-06-27 20:54:36
2022-06-28 09:23:00
2022-06-28 12:29:00
2022-06-28 16:55:49
2022-06-29 09:07:00
2022-06-29 13:43:00
2022-06-29 18:33:44
2022-06-30 08:42:00
2022-06-30 13:16:00
2022-06-30 16:51:10
2022-06-30 21:14:31
2022-07-01 09:12:00
2022-07-01 13:32:00
2022-07-01 18:00:33
2022-07-02 09:24:00
2022-07-02 13:25:00
2022-07-02 21:24:00
2022-07-03 08:55:00
2022-07-03 13:01:00
2022-07-03 17:59:00
2022-07-04 08:12:00
2022-07-04 12:13:00
2022-07-04 16:43:22
2022-07-05 08:29:00
2022-07-05 12:56:00
2022-07-05 17:46:57
2022-07-06 08:59:00
2022-07-06 13:26:00
2022-07-06 18:37:07
2022-07-07 08:10:00

participant number 26:
2022-08-06 08:08:00
2022-08-06 12:14:00
2022-08-06 16:48:00
2022-08-07 09:06:00
2022-08-07 12:28:00
2022-08-08 12:42:00
2022-08-08 18:00:00
2022-08-09 09:44:00
2022-08-09 13:25:00
2022-08-09 16:16:00
2022-08-10 08:58:00
2022-08-10 12:19:00
2022-08-10 16:09:00
2022-08-11 09:06:00
2022-08-11 12:43:00
2022-08-11 17:10

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)



participant number 28:
2022-08-16 08:08:09
2022-08-16 12:40:06
2022-08-16 18:57:00
2022-08-17 09:04:33
2022-08-17 12:51:32
2022-08-17 18:58:00
2022-08-18 09:25:56
2022-08-18 12:40:39
2022-08-18 18:53:00
2022-08-19 09:12:25
2022-08-19 12:29:40
2022-08-19 19:53:00
2022-08-20 08:59:45
2022-08-20 12:51:38
2022-08-20 19:52:00
2022-08-21 09:38:36
2022-08-21 12:23:01
2022-08-21 18:47:00
2022-08-22 09:11:59
2022-08-22 13:01:30
2022-08-22 19:56:00
2022-08-23 09:45:44
2022-08-23 12:30:38
2022-08-24 08:59:21
2022-08-24 12:39:48
2022-08-24 20:10:00
2022-08-25 09:33:59
2022-08-25 12:33:02
2022-08-26 09:33:51

participant number 29:


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


2022-09-28 08:20:12
2022-09-28 12:06:24
2022-09-28 17:22:00
2022-09-29 05:44:15
2022-09-29 11:56:25
2022-09-29 17:04:00
2022-09-30 06:18:10
2022-09-30 11:38:32
2022-09-30 16:20:00
2022-10-01 06:22:04
2022-10-01 11:53:24
2022-10-01 17:19:00
2022-10-02 06:30:27
2022-10-02 11:55:54
2022-10-02 17:37:00
2022-10-03 05:40:46
2022-10-03 11:40:54
2022-10-03 17:35:00
2022-10-04 05:57:35
2022-10-04 11:41:40
2022-10-04 17:36:00
2022-10-05 06:19:24
2022-10-05 11:39:51
2022-10-05 16:00:00
2022-10-06 05:51:14
2022-10-06 11:39:04
2022-10-06 16:34:00
2022-10-07 05:37:05
2022-10-07 11:39:10
2022-10-07 18:13:00
2022-10-08 06:31:55
done :)


In [88]:
#generate combined dataset of all participants
participants = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29]
dthree = []
auc3 = []
activeMin2 = []
activeMin3 = []
activeMin4 = []
avg3 = []
peakheight = []
peaktime = []
pd40 = []
startbg = []
iauc = []
hrmax = []
hrauc = []
hriauc = []
hrstart = []
z1mins = []
z2mins = []
z3mins = []
z4mins = []

pid = []
hba1c = []
calories = []
carbs = []
prot = []
fat = []
mealtime = []
snack = []
mtype = []
warnings.filterwarnings('ignore')

for x in participants:
    print("\n" + f"participant number {x}:")
    demographics = getPersonal(x)
    
    mldata = pd.read_excel(f"C:\\Users\\namil\\Downloads\\{str(x).zfill(3)}.xlsx").dropna()
    mldata['Meal Time'] = mldata['Meal Time'].to_numpy()
    readData(x)

    for i in mldata['Meal Time']:
        #print(str(i))
        activityMetrics(str(i))
        iaucAll(str(i), dthree)
        pid.append(x)
    calories.extend(mldata['Calories'])
    carbs.extend(mldata['Carbs'])
    prot.extend(mldata['Protein'])
    fat.extend(mldata['Fat'])
    mealtime.extend(mldata['Meal Time'])
    snack.extend( mldata['snack'])
    mtype.extend(mldata['Meal Type'])
    
warnings.filterwarnings('default')
df = pd.DataFrame({'participant_id': pid, 
               #'dexcom 2hr auc': dtwo, 'mets 2 hr auc': auc2, '2 hr activeMin': activeMin2, 'mets 2 hr avg': avg2,
               'peakheight': peakheight, 'peaktime': peaktime, 'peakduration_40': pd40, 'startbg': startbg, 'iauc': iauc,
               'dexcom 3hr auc':dthree, 'mets 3 hr auc': auc3, 'hrmax': hrmax,'hrauc': hriauc,'hrauc': hrstart, 'hrauc': hrstart,
               'activeMin_2': activeMin2, 'activeMin_3': activeMin3, 'activeMin_4': activeMin4,
               'mets 3 hr avg': avg3, 'calories':calories,'carbs':carbs,'protein':prot,'fat':fat,
               'Meal Time': mealtime, 'snack in 3hrs': snack, 'Meal Type': mtype,})

df.to_csv("combined_dataset")    
print("done :)")


participant number 1:

participant number 2:

participant number 3:

participant number 4:

participant number 5:

participant number 6:

participant number 7:

participant number 8:

participant number 9:

participant number 10:

participant number 11:

participant number 12:

participant number 13:

participant number 14:

participant number 15:

participant number 16:

participant number 17:

participant number 18:

participant number 19:

participant number 20:

participant number 21:

participant number 22:

participant number 23:

participant number 26:

participant number 27:

participant number 28:

participant number 29:
done :)
