In [1]:
import pickle
import pandas as pd
import numpy as np


In [2]:
Questions = ['Q1:What was my mean glucose?',
'Q2:What was my maximum glucose?',
'Q3:What was the standard deviation of my glucose? ',
'Q4:What was my minimum glucose?',
'Q5:What was my percent time in range? ',
'Q6:What was my percent time in hyperglycemia?',
'Q7:What was my percent time in hypoglycemia?',
'Q8:What was my glycemic variability?',
'Q9:What was my percent time in severe hyperglycemia?',
'Q10:What is my estimated A1C?',
'Q11:What was my percent time in severe hypoglycemia?',
             
'Q12:What time was my blood glucose highest?',
'Q13:What day was my glucose control the most out of range?',
'Q14:What time of the day was my blood glucose lowest? ',
'Q15:When did my most recent episode of hypoglycemia start?',
'Q16:How long was my last episode of hypoglycemia?',
'Q17:What was my longest time spent in hyperglycemia? ',
'Q18:How many times did I experience hypoglycemia? ',
'Q19:What was my mean overnight blood glucose? ',
'Q20:What period of the day did I have the highest blood glucose?',
'Q21:Did I have noctural hypoglycemia? ',
'Q22:What was my highest glucose reading during dinner? ',

'Q23:What percent of time was my CGM active?',
'Q24:How many times did my sensor disconnect?',
'Q25:Was my low blood glucose likely due to sensor error?',
'Q26:Are there any artifacts in the CGM data?',
                  
'Q27:Was my average glucose control today better than yesterday?',
'Q28:Was my time in range improved this week compared to last week? ',
'Q29:Was my max glucose lower today than yesterday?',
'Q30:Did I spend less time in hypoglycemia this week than last week?']

In [31]:
def get_stats(data_all, name_, data):
    df = data.copy()
    data_all.loc[name_,'Q1:Average']  = np.mean(data['CGM'])
    data_all.loc[name_,'Q2:Maximum'] = np.max(data['CGM'])
    data_all.loc[name_,'Q3:STD' ]= np.std(data['CGM'])
    data_all.loc[name_,'Q4:Mininum'] = np.min(data['CGM'])
    tbh= data[data['CGM']<180]
    data_all.loc[name_,'Q5:TIR'] = len(tbh[tbh['CGM']>70])/(len(data['CGM']))
    data_all.loc[name_,'Q6:TAR 1 (>180)'] = len(data[data['CGM']>180])/(len(data['CGM']))
    data_all.loc[name_,'Q7:TBR 1 (<70)'] = len(data[data['CGM']<70])/(len(data['CGM']))
    data_all.loc[name_,'Q8:CV' ]= np.std(data['CGM'])/np.mean(data['CGM'])
    data_all.loc[name_,'Q9:TAR 2 (>250)'] = len(data[data['CGM']>250])/(len(data['CGM']))
    data_all.loc[name_,'Q10:eA1c'] =( 46.7+np.mean(data['CGM'])) / 28.7
    
    data_all.loc[name_,'Q11:TBR 2 (<54)'] = len(data[data['CGM']<54])/(len(data['CGM']))
    data_all.loc[name_,'Q12:Highest time'] =df.loc[df['CGM'].idxmax(), 'Time']
    df['Date'] = df['Time'].dt.date
    
    oor = df[(df['CGM'] < 70) | (df['CGM'] > 180)]
    data_all.loc[name_,'Q13:Day out of range'] =oor['Date'].value_counts().idxmax()
    
    
    #Q14
    data_all.loc[name_,'Q14: lowest'] = df.loc[df['CGM'].idxmin(), 'Time'].time()

    #Q15
    hypos = df[df['CGM'] < 70]
    if not hypos.empty:
        most_recent_hypo = hypos['Time'].iloc[-1]
    else:
        most_recent_hypo= np.nan
    data_all.loc[name_,'Q15: most recent low'] =most_recent_hypo

    ## Q 16 and Q17
    df['lessthan70'] = df['CGM'] < 70
    df['starthypo'] = df['lessthan70'] & ~df['lessthan70'].shift(1, fill_value=False)
    df['Group'] = df['starthypo'].cumsum()
    df_filtered = df[df['lessthan70']]
    sequence_lengths = df_filtered.groupby('Group').size()
    if len(sequence_lengths) >1: 
        data_all.loc[name_,'Q16:most recent length of low'] =    sequence_lengths[-1:].values[0]
    else:
        data_all.loc[name_,'Q16:most recent length of low'] = np.nan
    df['hyper'] = df['CGM'] >180
    df['starthyper'] = df['hyper'] & ~df['hyper'].shift(1, fill_value=False)
    df['Group'] = df['starthyper'].cumsum()
    df_filtered = df[df['hyper']]
    sequence_lengths = df_filtered.groupby('Group').size()
    if len(sequence_lengths)>0:
        data_all.loc[name_,'Q17:max length in hyper'] =max(sequence_lengths)*5
    else:
        data_all.loc[name_,'Q17:max length in hyper'] =np.nan

    # Q18 and Q19
    df['Group'] = df['starthypo'].cumsum()
    df_filtered = df[df['lessthan70']]
    sequence_lengths = df_filtered.groupby('Group').size()
    data_all.loc[name_,'Q18:number of hypo'] =len(sequence_lengths)

    overnight = df[(df['Time'].dt.hour >= 0) & (df['Time'].dt.hour < 6)]
    mean_overnight_glucose = overnight['CGM'].mean()
    data_all.loc[name_,'Q19']=  mean_overnight_glucose
    
    def get_meal(hour):
        if 6 <= hour < 12:
            return 'Morning'
        elif 12 <= hour < 18:
            return 'Afternoon'
        elif 18 <= hour < 24:
            return 'Evening'
        else:
            return 'Night'
    # Periods of day defind as above
    df['Period'] = df['Time'].dt.hour.apply(get_meal)
    highest_period = df.loc[df['CGM'].idxmax(), 'Period'] 
    data_all.loc[name_,'Q20'] =highest_period

    # Noctural hypo between 0 and 6
    night = df[(df['Time'].dt.hour >= 0) & (df['Time'].dt.hour < 6)]
    nocturnal_hypoglycemia = night[night['CGM'] < 70]
    data_all.loc[name_,'Q21'] =not nocturnal_hypoglycemia.empty
    
    # Dinner between 5 and 10
    dinner = df[(df['Time'].dt.hour >= 17) & (df['Time'].dt.hour < 22)]
    highest_dinner_glucose = dinner['CGM'].max()
    data_all.loc[name_,'Q22']= highest_dinner_glucose

    
    # Ensure the DataFrame is sorted by time
    df = df.sort_values(by='Time')
    first_reading = df['Time'].iloc[0]
    most_recent_reading = df['Time'].iloc[-1]
    total_time_span = (most_recent_reading - first_reading).total_seconds() / 60
    expected = total_time_span / 5
    actual = len(df)
    data_all.loc[name_,'Q23:Percent active'] = (actual / expected) * 100
    
    df['diff_time'] = df['Time'].diff()
    data_all.loc[name_,'Q24:Time disconnect'] = df['diff_time'].max()
    
    # Sensor artifacts 
    ### SENSOR SPECIFIC 
    data_all.loc[name_,'Q25:Low glucose sensor error'] =np.nan
    data_all.loc[name_,'Q26:Artifacts'] = np.nan
    
    ####
    # Q27, Q29 daily comparisons
    ### Needs updating for definition of "Today/Yesterday" and "This week/last week"
    most_recent= df['Time'].dt.date.max()
    yesterday = most_recent - pd.Timedelta(days=1)
    df_today= df[df['Time'].dt.date == most_recent]
    df_yesterday = df[df['Time'].dt.date == yesterday]
    avg_glucose_today = df_today['CGM'].mean()
    avg_glucose_yesterday = df_yesterday['CGM'].mean()
    data_all.loc[name_,'Q27:better'] =avg_glucose_today < avg_glucose_yesterday
    max_glucose_today = df_today['CGM'].max()
    max_glucose_yesterday = df_yesterday['CGM'].max()
    data_all.loc[name_,'Q29:max'] = max_glucose_today < max_glucose_yesterday
    
    #Q30, Q28  Weekly comparisons
    last_week = most_recent - pd.Timedelta(days=7)
    last_week_start = most_recent- pd.Timedelta(days=14)
    df_this_week = df[(df['Time'].dt.date > last_week)]
    df_last_week = df[(df['Time'].dt.date < last_week) & (df['Time'].dt.date <= last_week_start)]
    time_in_hypo_this_week = (df_this_week['CGM'] < 70).mean()
    time_in_hypo_last_week = (df_last_week['CGM'] < 70).mean()
    data_all.loc[name_,'Q30:hypo'] = time_in_hypo_this_week<time_in_hypo_last_week 
    tir_this_week = ((df_this_week['CGM'] > 70) & (df_this_week['CGM'] <180 )).mean()
    tr_last_week = ((df_last_week['CGM'] > 70) & (df_last_week['CGM'] <180)).mean()
    data_all.loc[name_,'Q28:tir'] = tir_this_week>tr_last_week 

    return data_all