In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.table import Table
import seaborn as sns
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import datetime
from datetime import timedelta
from statistics import mode
from scipy.signal import find_peaks


from scipy.integrate import odeint

# Libraries for Correlations
import scipy.stats as stats
from scipy.stats import pearsonr, sem, variation, kruskal,f_oneway
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from scipy.spatial.distance import pdist, squareform

from sklearn.preprocessing import LabelEncoder

from itertools import combinations, permutations

%matplotlib inline


############################################

Processing the Glucose Values to Isolate the PPGR

############################################

In [None]:
def read_data(filename):
    unfiltered = pd.read_csv(os.path.join(filename))
    # Use the detected 'MacRoman' encoding
    #unfiltered = pd.read_csv(os.path.join(filename), encoding='MacRoman')
    unfiltered['glucose_level_ts'] = pd.to_datetime(unfiltered['glucose_level_ts'], dayfirst=True, errors='coerce')
    unfiltered['meal_ts'] = pd.to_datetime(unfiltered['meal_ts'], dayfirst=True, errors='coerce')
    unfiltered['bolus_ts'] = pd.to_datetime(unfiltered['bolus_ts'], dayfirst=True, errors='coerce')

    return unfiltered


In [None]:
def interleave_arrays_increasing(meal, bolus):
    result = []
    i, j = 0, 0
    while i < len(meal) and j < len(bolus):
        if meal[i] < bolus[j]:
            result.append(("meal", meal[i]))
            i += 1
        else:
            result.append(("bolus", bolus[j]))
            j += 1
    result.extend([("meal", meal[x]) for x in range(i, len(meal))])
    result.extend([("bolus", bolus[x]) for x in range(j, len(bolus))])
    return result


In [None]:
def glucoseForMealsTs(glucose_ts_array, meal_ts):
    closest_values = []
    for element in meal_ts:
        closest_index = np.abs(glucose_ts_array - element).argmin()
        closest_values.append(glucose_ts_array[closest_index])
    return np.array(closest_values)


In [None]:
def glucoseForEventsTs(glucose_ts_array, events_ts):
    closest_values = []
    for event_type, event_ts in events_ts:
        closest_index = np.abs(glucose_ts_array - event_ts).argmin()
        closest_value = glucose_ts_array[closest_index]

        if abs(closest_value - event_ts) <= pd.Timedelta(hours=4):
            closest_values.append((event_type, event_ts, closest_value))

    return closest_values


In [None]:
def bolusMealSeparation(meal_ts, bolus_start_ts, bolus_dose_filtered):
    viableBolusTimes = []
    for i in range(min(len(bolus_start_ts), len(bolus_dose_filtered))):
        isWithinRange = False
        for j in range(len(meal_ts)):
            if meal_ts[j] - pd.Timedelta(minutes=4) <= bolus_start_ts[i] <= meal_ts[j] + pd.Timedelta(hours=4):
                isWithinRange = True
                break
        if not isWithinRange:
            viableBolusTimes.append((bolus_start_ts[i], bolus_dose_filtered[i]))
    return viableBolusTimes


In [None]:
def groupBolus2(bolus_array):
    time_ranges = [
        ("6am-10am", datetime.time(6, 0), datetime.time(10, 0)),
        ("10am-2pm", datetime.time(10, 0), datetime.time(14, 0)),
        ("2pm-6pm", datetime.time(14, 0), datetime.time(18, 0)),
        ("6pm-10pm", datetime.time(18, 0), datetime.time(22, 0)),
    ]

    result = []
    df = pd.DataFrame(bolus_array, columns=['Timestamp', 'Value'])
    grouped = df.groupby(df['Timestamp'].dt.date)

    for date, group_data in grouped:
        daily_result = {'Date': date, 'TimeRanges': []}
        for label, start_time, end_time in time_ranges:
            time_mask = (group_data['Timestamp'].dt.time >= start_time) & (group_data['Timestamp'].dt.time < end_time)
            max_value = group_data.loc[time_mask, 'Value'].max()
            max_timestamps = group_data.loc[(time_mask) & (group_data['Value'] == max_value), 'Timestamp'].tolist()
            daily_result['TimeRanges'].append({
                'TimeRange': label,
                'MaxValue': max_value,
                'Timestamps': max_timestamps
            })
        result.append(daily_result)
    return result



In [None]:
def findTimestampsNotCoveredByMeals(result, meal_events):
    time_ranges = {
        "6am-10am": (datetime.time(6, 0), datetime.time(10, 0)),
        "10am-2pm": (datetime.time(10, 0), datetime.time(14, 0)),
        "2pm-6pm": (datetime.time(14, 0), datetime.time(18, 0)),
        "6pm-10pm": (datetime.time(18, 0), datetime.time(22, 0))
    }

    meal_events_set = set(meal_events)
    timestamps_not_covered = []

    for day_result in result:
        for time_range_result in day_result['TimeRanges']:
            time_range_label = time_range_result['TimeRange']
            timestamps = time_range_result['Timestamps']
            time_range_start, time_range_end = time_ranges[time_range_label]

            meal_events_within_range = False
            for meal in meal_events_set:
                meal_time = meal.time()
                time_range_start_datetime = datetime.datetime.combine(day_result['Date'], time_range_start)
                time_range_end_datetime = datetime.datetime.combine(day_result['Date'], time_range_end)

                if time_range_start_datetime <= meal <= time_range_end_datetime:
                    meal_events_within_range = True
                    break

            if not meal_events_within_range:
                timestamps_not_covered.extend(timestamps)
    return timestamps_not_covered


In [None]:
def filter_glucose_levels(glucose_ts_array, glucose_level_array, event_ts):
    start_time = event_ts
    end_time = event_ts + pd.Timedelta(hours=4)
    filtered_glucose_levels = []
    previous_timestamp = None
    for ts, level in zip(glucose_ts_array, glucose_level_array):
        if start_time <= ts <= end_time:
            if previous_timestamp is not None and (ts - previous_timestamp) > pd.Timedelta(minutes=30):
                break
            filtered_glucose_levels.append(level)
            previous_timestamp = ts
    return filtered_glucose_levels


###################################

Read and format DataFrame

###################################

In [None]:
# Read data from the original file
participant_file = 'UoM2309.csv'  # Update this with the actual file name

insulin_sensitivity_factor = 5.8

unfiltered = read_data(participant_file)
unfiltered.sort_values('glucose_level_ts', inplace=True)
unfiltered.reset_index(drop=True, inplace=True)

glucose_level_ts = pd.to_datetime(unfiltered['glucose_level_ts'].copy().to_numpy(), dayfirst=True, errors='coerce')
glucose_level = unfiltered['glucose_level'].copy().to_numpy()  # Already converted to mmol/L
bolus_ts = pd.to_datetime(unfiltered['bolus_ts'].copy().to_numpy(), dayfirst=True, errors='coerce')
meal_ts = pd.to_datetime(unfiltered['meal_ts'].copy().to_numpy(), dayfirst=True, errors='coerce')

bolus_dose = unfiltered['bolus_dose'].copy().to_numpy()
carbs_g = unfiltered['carbs_g']
meal_tags = unfiltered['meal_tag']
meal_types = unfiltered['meal_Type']


In [None]:

nan_mask = np.isnan(bolus_dose)
bolus_dose_filtered = bolus_dose[~nan_mask]

nat_mask = np.isnat(meal_ts)
meal_ts_filtered = meal_ts[~nat_mask]

nat_mask = np.isnat(bolus_ts)
bolus_start_ts_filtered = bolus_ts[~nat_mask]

closest_glucose_array_meals = glucoseForMealsTs(glucose_level_ts, meal_ts_filtered)

bolusAndValueArray = bolusMealSeparation(meal_ts_filtered, bolus_start_ts_filtered, bolus_dose_filtered)
max_bolus_time_range = groupBolus2(bolusAndValueArray)
bolus_replacement_array = findTimestampsNotCoveredByMeals(max_bolus_time_range, meal_ts_filtered)

interleaved_meal_bolus_array = interleave_arrays_increasing(meal_ts_filtered, bolus_replacement_array)
closest_glucose_meal_bolus_array = glucoseForEventsTs(glucose_level_ts, interleaved_meal_bolus_array)



In [None]:
# Extract 'carbs_g' and 'meal_tags' from the original data
carbs_g = unfiltered['carbs_g']

data_points = []

for event_type, event_ts, glucose_ts in closest_glucose_meal_bolus_array:
    closest_index = np.abs(glucose_level_ts - glucose_ts).argmin()
    glucose_levels = glucose_level[closest_index:closest_index + 48]

    carbs_value = unfiltered.loc[unfiltered['meal_ts'] == event_ts, 'carbs_g'].values[0] if event_type == "meal" else None
    meal_tag_value = unfiltered.loc[unfiltered['meal_ts'] == event_ts, 'meal_tag'].values[0] if event_type == "meal" else None
    meal_type_value = unfiltered.loc[unfiltered['meal_ts'] == event_ts, 'meal_Type'].values[0] if event_type == "meal" else None
    bolus_dose_value = unfiltered.loc[unfiltered['meal_ts'] == event_ts, 'bolus_dose'].values[0] if event_type == "meal" else None
    bolus_time = unfiltered.loc[unfiltered['meal_ts'] == event_ts, 'bolus_ts'].values[0] if event_type == "meal" else None

    data_point = {
        "EventTimestamp": event_ts,
        "GlucoseLevels": glucose_levels,
        "EventType": event_type,
        "EventTag": carbs_value,
        "MealTag": meal_tag_value,
        "MealType": meal_type_value,
        "BolusTime" : bolus_time,
        "BolusDose" : bolus_dose_value
    }
    data_points.append(data_point)


In [None]:
# Assume data_points is already defined as your input DataFrame
GlucoseEvents_exploded_clean = pd.DataFrame(data_points)


In [None]:

GlucoseEvents_exploded_clean['EventTimestamp'] = pd.to_datetime(GlucoseEvents_exploded_clean['EventTimestamp'])
GlucoseEvents_exploded_clean['day_of_the_week'] = GlucoseEvents_exploded_clean['EventTimestamp'].dt.dayofweek
GlucoseEvents_exploded_clean['hour'] = GlucoseEvents_exploded_clean['EventTimestamp'].dt.hour

# Ensure Meal_Type is prioritized for MealCategory
GlucoseEvents_exploded_clean['MealCategory'] = GlucoseEvents_exploded_clean['MealType']

# Assign time-based categories where MealType is not available
time_based_categories = pd.cut(
    GlucoseEvents_exploded_clean['hour'],
    bins=[0, 10, 16, 22],
    labels=['Breakfast', 'Lunch', 'Dinner'],
    right=False
)

GlucoseEvents_exploded_clean['MealCategory'].fillna(time_based_categories, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  GlucoseEvents_exploded_clean['MealCategory'].fillna(time_based_categories, inplace=True)


In [None]:
# Print the DataFrame
print(GlucoseEvents_exploded_clean)



         EventTimestamp                                      GlucoseLevels  \
0   2024-02-15 16:41:00  [16.1, 16.2, 16.6, 16.7, 16.6, 16.4, 16.1, 15....   
1   2024-02-16 21:35:00  [6.4, 6.8, 6.9, 6.8, 6.7, 6.5, 6.0, 5.8, 5.3, ...   
2   2024-02-20 21:28:00  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...   
3   2024-02-23 08:39:00  [18.6, 19.0, 19.0, 18.5, 17.6, 15.8, 14.9, 14....   
4   2024-03-02 07:10:00  [16.9, 16.8, 16.6, 16.4, 15.8, 15.5, 14.8, 12....   
..                  ...                                                ...   
130 2024-02-16 13:45:00  [9.9, 9.3, 8.8, 8.6, 8.4, 8.4, 8.3, 8.3, 8.2, ...   
131 2024-02-15 18:50:00  [7.4, 7.3, 6.8, 6.3, 5.9, 5.4, 5.1, 4.7, 4.4, ...   
132 2024-02-15 12:05:00  [7.7, 8.2, 8.7, 9.0, 9.2, 9.7, 10.2, 10.2, 10....   
133 2024-02-14 18:50:00  [16.3, 16.3, 16.4, 16.4, 16.3, 16.1, 15.8, 15....   
134 2024-02-14 12:05:00  [5.5, 5.5, 5.4, 5.1, 4.9, 4.7, 4.7, 4.7, 4.6, ...   

    EventType  EventTag                   MealTag MealType  \
0

In [None]:
# Function to compute insulin action profile for rapid-acting insulin
def insulin_action_profile(time, bolus_time, insulin_dose, insulin_sensitivity):
    insulin_effect = np.zeros_like(time, dtype=float)

    for i, t in enumerate(time):
        if t >= bolus_time + 15 and t < bolus_time + 240:
            if t <= bolus_time + 60:
                insulin_effect[i] = -1 * (t - bolus_time - 15) * (insulin_dose * insulin_sensitivity / 45)  # onset to peak
            else:
                insulin_effect[i] = -1 * (insulin_dose * insulin_sensitivity - ((t - bolus_time - 60) * (insulin_dose * insulin_sensitivity / 180)))  # peak to end

    return insulin_effect


# Initialize an empty list to store net responses
net_responses = []

# Iterate over each row in the GlucoseEvents DataFrame
for index, row in GlucoseEvents_exploded_clean.iterrows():
    # Get the necessary parameters for insulin action
    bolus_time = row['BolusTime']  # This should be a pandas Timestamp
    bolus_dose = row['BolusDose']
    glucose_levels = row['GlucoseLevels']

    # Remove non-numeric glucose levels (e.g., 'Low', 'High', etc.)
    cleaned_glucose_levels = []
    for glucose in glucose_levels:
        try:
            cleaned_glucose_levels.append(float(glucose))  # Try converting to float
        except ValueError:  # If it fails, skip or handle accordingly
            cleaned_glucose_levels.append(np.nan)  # Replace non-numeric with NaN (or handle as needed)

    # Convert the list of glucose levels to a numpy array for calculations
    glucose_levels = np.array(cleaned_glucose_levels)

    # Convert BolusTime to minutes since the start of the observation
    start_time = row['EventTimestamp']  # Reference time (event time)
    bolus_time_in_minutes = (bolus_time - start_time).total_seconds() / 60.0  # Convert to minutes

    # Create a time vector based on the length of glucose_levels
    time = np.arange(len(glucose_levels)) * 5  # Assuming glucose levels are recorded every 5 minutes

    # Calculate the insulin effect based on the bolus dose and time
    insulin_effect = insulin_action_profile(time, bolus_time_in_minutes, bolus_dose, insulin_sensitivity_factor)

    # Calculate net response by adjusting glucose levels for insulin effect
    net_response = glucose_levels + insulin_effect  # Element-wise addition

    # Store the net response
    net_responses.append(net_response)

# Add the net responses to the GlucoseEvents DataFrame
GlucoseEvents_exploded_clean['NetResponse'] = net_responses

# Display the updated DataFrame
print(GlucoseEvents_exploded_clean[['EventTimestamp', 'GlucoseLevels', 'BolusTime', 'BolusDose', 'NetResponse']])



         EventTimestamp                                      GlucoseLevels  \
0   2024-02-15 16:41:00  [16.1, 16.2, 16.6, 16.7, 16.6, 16.4, 16.1, 15....   
1   2024-02-16 21:35:00  [6.4, 6.8, 6.9, 6.8, 6.7, 6.5, 6.0, 5.8, 5.3, ...   
2   2024-02-20 21:28:00  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...   
3   2024-02-23 08:39:00  [18.6, 19.0, 19.0, 18.5, 17.6, 15.8, 14.9, 14....   
4   2024-03-02 07:10:00  [16.9, 16.8, 16.6, 16.4, 15.8, 15.5, 14.8, 12....   
..                  ...                                                ...   
130 2024-02-16 13:45:00  [9.9, 9.3, 8.8, 8.6, 8.4, 8.4, 8.3, 8.3, 8.2, ...   
131 2024-02-15 18:50:00  [7.4, 7.3, 6.8, 6.3, 5.9, 5.4, 5.1, 4.7, 4.4, ...   
132 2024-02-15 12:05:00  [7.7, 8.2, 8.7, 9.0, 9.2, 9.7, 10.2, 10.2, 10....   
133 2024-02-14 18:50:00  [16.3, 16.3, 16.4, 16.4, 16.3, 16.1, 15.8, 15....   
134 2024-02-14 12:05:00  [5.5, 5.5, 5.4, 5.1, 4.9, 4.7, 4.7, 4.7, 4.6, ...   

              BolusTime  BolusDose  \
0                   NaT  

In [None]:
GlucoseEvents_exploded_clean.head()

Unnamed: 0,EventTimestamp,GlucoseLevels,EventType,EventTag,MealTag,MealType,BolusTime,BolusDose,day_of_the_week,hour,MealCategory,NetResponse,PeakGlucose,AdjustedGlucose,NormalizedGlucose,CV
134,2024-02-14 12:05:00,"[5.5, 5.5, 5.4, 5.1, 4.9, 4.7, 4.7, 4.7, 4.6, ...",meal,51.0,blackbeanSoup+BananaSoup,Brunch,2024-04-22 22:09:00,2.05,2,12,Brunch,"[5.5, 5.5, 5.4, 5.1, 4.9, 4.7, 4.7, 4.7, 4.6, ...",13.7,"[-1.5, -0.7, -0.4, -0.5, -0.5, 0.0]","[-1.5, -0.7, -0.4, -0.5, -0.5, 0.0]",43.882262
133,2024-02-14 18:50:00,"[16.3, 16.3, 16.4, 16.4, 16.3, 16.1, 15.8, 15....",meal,16.5,GreenBananaSoup,Supper,2024-04-22 11:31:00,3.325,2,18,Supper,"[16.3, 16.3, 16.4, 16.4, 16.3, 16.1, 15.8, 15....",16.4,"[-0.1, -0.1, 0.0, 0.0, -0.1, -0.3, -0.6, -1.3,...","[-0.1, -0.1, 0.0, 0.0, -0.1, -0.3, -0.6, -1.3,...",38.258692
132,2024-02-15 12:05:00,"[7.7, 8.2, 8.7, 9.0, 9.2, 9.7, 10.2, 10.2, 10....",meal,35.4,BlackbeanSoup,Brunch,2024-04-21 17:35:00,3.1,3,12,Brunch,"[7.7, 8.2, 8.7, 9.0, 9.2, 9.7, 10.2, 10.2, 10....",19.0,"[-0.8, -0.7, -0.6, -0.4, -0.2, 0.0, -0.7, -1.0...","[-0.8, -0.7, -0.6, -0.4, -0.2, 0.0, -0.7, -1.0...",22.483596
0,2024-02-15 16:41:00,"[16.1, 16.2, 16.6, 16.7, 16.6, 16.4, 16.1, 15....",bolus,,,,NaT,,3,16,Dinner,"[16.1, 16.2, 16.6, 16.7, 16.6, 16.4, 16.1, 15....",16.7,"[-0.6, -0.5, -0.1, 0.0, -0.1, -0.3, -0.6, -1.2...","[-0.6, -0.5, -0.1, 0.0, -0.1, -0.3, -0.6, -1.2...",48.159029
131,2024-02-15 18:50:00,"[7.4, 7.3, 6.8, 6.3, 5.9, 5.4, 5.1, 4.7, 4.4, ...",meal,47.0,Fish+Veg,Dinner,2024-04-21 15:31:00,3.75,3,18,Dinner,"[7.4, 7.3, 6.8, 6.3, 5.9, 5.4, 5.1, 4.7, 4.4, ...",10.3,"[-1.0, -0.5, -0.5, -0.2, -0.1, 0.0]","[-1.0, -0.5, -0.5, -0.2, -0.1, 0.0]",34.268673
