In [116]:
import numpy as np
import pandas as pd

from datetime import datetime

from sklearn.preprocessing import OneHotEncoder

In [117]:
#load in data
food_df = pd.read_csv("Diabetes_Data/log_data_fooddata.csv")
insulin_df = pd.read_csv("Diabetes_Data/log_data_insulindata.csv")
corrections_df = pd.read_csv("Diabetes_Data/log_data_correctionsdata.csv")

bg_df = pd.read_csv("Diabetes_Data/bs_data.csv")
hr_df = pd.read_csv("Diabetes_Data/hr_data.csv")

In [118]:
insulin_types = ['fiasp', 'regular', 'nph', 'treisba']

In [119]:
#CLEAN DATA FUNCTIONS
def clean_log_times(logname): 
    #extract date as column
    logname['date'] = [pd.Timestamp(k.split(' ')[0]) for k in logname['datetime']]
    #extract and clean time as column
    logname['time'] = [k.split(' ')[1] for k in logname['datetime']]
    logname['hour'] = [int(k.split(':')[0]) for k in logname['time']]
    logname['minute'] = [int(k.split(':')[1]) for k in logname['time']]
    #drop time and datetime
    logname = logname.drop(['time', 'datetime'], axis='columns')

    for k in range(len(logname['minute'])):
        min = logname['minute'][k]
        res = min % 5
        logname.at[k, 'minute'] = min-res if res < 3 else min+(5-res)
        
    logname = logname.groupby(by=['date', 'hour', 'minute']).sum().reset_index()
    return logname

def clean_sep_date_times(logname):
    logname['date'] = pd.to_datetime(logname['date'])
    logname['hour'] = [int(k.split(':')[0]) for k in logname['time']]
    logname['minute'] = [int(k.split(':')[1]) for k in logname['time']]
    logname = logname.drop('time', axis='columns')
    return logname

def combine_dfs(left_df, right_df):
    df = left_df.merge(right_df, on=['date', 'hour', 'minute'], how='left')
    df = df.fillna(0)
    return df

In [120]:
datetime_list =  pd.date_range(
                        start=bg_df.date[len(bg_df)-1],
                        end=bg_df.date[0],
                        freq='5min').to_frame(index=False, name='datetime')

datetime_list['date'] = [str(k).split()[0] for k in datetime_list['datetime']]
datetime_list['time'] = [str(k).split()[1] for k in datetime_list['datetime']]

datetime_list = clean_sep_date_times(datetime_list)
datetime_list.drop('datetime', axis='columns', inplace=True)

print(datetime_list.shape)

(5473, 3)


In [121]:
#CLEAN FOOD DATA
food_df = clean_log_times(food_df)
food_df = food_df.drop(['servingAmount', 'servingId', 'id', 'foodId'], axis='columns')

In [122]:
#MERGE DATA
df = combine_dfs(datetime_list, food_df)

In [123]:
print(len(df))

#i need to find datetimes that are not in my datetime list

5473


In [124]:
#CLEAN BLOOD GLUCOSE DATA
bg_df = clean_sep_date_times(bg_df)

In [125]:
#MERGE DATA
df = combine_dfs(df, bg_df)
print(len(df))

5473


In [126]:
#initialize insulin type cols with 0s
for k in insulin_types:
    insulin_df[k] = 0

#set amounts for each insulin type
for k in range(len(insulin_df)):
    insulin_df.at[k, insulin_types[insulin_df.at[k, 'insulinId']]] = insulin_df.at[k, 'amount']
    


#CLEAN INSULIN DATA
insulin_df = clean_log_times(insulin_df)
insulin_df = insulin_df.drop(['id', 'insulinId', 'amount'], axis='columns')

In [127]:
insulin_df
    


Unnamed: 0,date,hour,minute,fiasp,regular,nph,treisba
0,2021-11-22,12,30,0,0,0,14
1,2021-11-22,13,20,1,0,0,0
2,2021-11-22,18,40,17,3,2,0
3,2021-11-22,22,5,1,0,0,0
4,2021-11-23,0,40,1,0,0,0
...,...,...,...,...,...,...,...
120,2021-12-10,2,15,2,0,0,0
121,2021-12-10,15,45,5,0,0,18
122,2021-12-10,21,20,5,0,0,0
123,2021-12-11,4,60,7,0,0,0


In [128]:
#MERGE DATA
df = combine_dfs(df, insulin_df)
print(len(df))

5473


In [129]:
#CLEAN CORRECTIONS DATA
corrections_df = clean_log_times(corrections_df)
corrections_df = corrections_df.drop(['id', 'correctionId'], axis='columns')

corrections_df.rename(columns={'amount': 'corrs_amount'}, inplace=True)

In [130]:
#MERGE DATA
df = combine_dfs(df, corrections_df)
print(len(df))

5473


In [131]:
#CLEAN HEART RATE DATA
hr_df = clean_sep_date_times(hr_df)

In [132]:
#MERGE DATA
df = combine_dfs(df, hr_df)
df['weekday'] = [k.day_name() for k in df['date']]
print(len(df))

5473


In [133]:
print(len(df))
print(len(datetime_list))

5473
5473


In [134]:
df.to_csv('clean_data.csv')