In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("~/Desktop/NCSA_genomics/Python - notebooks/TSForecasting/Data/consolidatedDataForPackage.csv", sep=",") # pre-processed by removing entry "Low"
#data

In [64]:
from datetime import datetime
from statistics import mean
from datetime import timedelta

In [65]:
data['Display Time'] = pd.to_datetime(data['Display Time'])
data['GlucoseValue'] = pd.to_numeric(data['GlucoseValue'])
xx = data[data['subjectId'] == "OD552"]
xx.dtypes

Display Time    datetime64[ns]
GlucoseValue           float64
subjectId               object
dtype: object

In [66]:
def fullDay(data):
        
    dates = list()
    data = data.reset_index(drop=True)
    for i in range(0,len(data.index)):
        dates.append(data['Display Time'][i].date())
    data['Dates'] = dates
    end = data['Dates'].iloc[-1]
    start = data['Dates'].iloc[0]

    indexVals = data[ data['Dates'] == start ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    indexVals = data[ data['Dates'] == end ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    data = data.reset_index(drop=True)

    data.drop(['Dates'], axis=1, inplace=True)

    return data

In [67]:
xx = fullDay(xx)
xx.head()

Unnamed: 0,Display Time,GlucoseValue,subjectId
0,2025-04-17 00:02:07,81.0,OD552
1,2025-04-17 00:07:07,80.0,OD552
2,2025-04-17 00:12:07,80.0,OD552
3,2025-04-17 00:17:07,81.0,OD552
4,2025-04-17 00:22:07,82.0,OD552


In [70]:
def gvp(x, units):
    if (units != 'mg'):
        return print('units can only be mg')
    
    dt = x.iloc[:, 0].diff()/np.timedelta64(1,'m') # assuming that sampling can not necessarily be equally spaced
    dy = x.iloc[:, 1].diff()
    
    L = np.sum(np.sqrt(dt**2 + dy**2))
    L_0 = np.sum(dt)
    
    GVP = (L/L_0 -1) *100
    return pd.DataFrame({'GVP(%)':[GVP]})

In [71]:
# Personal Glycemic State
# The PGS is an additive composite metric calculated using the following simple equation
# PGS  = F(GVP) + F(MG) + F(PTIR) + F(H),
# where F(GVP) is a function of the glycemic variability percentage, 
# F(MG) is a function of the mean glucose, 
# F(PTIR) is a function of the percent time in range (from 70 to 180 mg/ dL), and 
# F(H) is a function of the incidence of the number of hypoglycemic episodes per week.
# The hypoglycemia function incorporates two distinct thresholds (54 and 70 mg/dL) and is 
# the sum of two terms: F54(H) and F70(H).
# PGS is computed per week and then averaged across all weeks.
# The min value of the PGS metric is 4.6 corresponding to excellent glycemic control 
# (no diabetes or patients with diabetes under superb glycemic control). 
# The max value of the PGS metric is 40 corresponding to a poor quality of glycemic control 
# that would be seen in patients with elevated A1c values, high mean glucose, and low percent of time 
# in the euglycemic range.

# DESCRIPTION: Takes in a sequesnce of continuous glucose values and computes
# glycemic variability percentage SDRC.
# Operated on data given either in mmol/L or mg/dL.

# FUNCTION PARAMETERS: x - is Pandas dataframe, in the fist column is given subject ID, 
# in the second - Pandas time stamp, and in the fird - numeric values of 
# continuous glucose readings.

# RETRUN: Output is Pandas dataframe that contains numeric value for DT.

# REFERENCES:
# -  I. B. Hirsch, A. K. Balo, K. Sayer, A. Garcia, B. A. Buckingham, and T. A.Peyser. 
# A simple composite metric for the assessment of glycemic status fromcontinuous glucose 
# monitoring data: implications for clinical practice and theartificial pancreas. 
# Diabetes technology & therapeutics, 19(S3):S–38, 2017.

def pgs(x, units):
    if (units != 'mg'):
        return print('units can only be mg')
    
    N54 = len(x[x.iloc[:,1]<=54])
    F_54H = 0.5 + 4.5 * (1 - np.exp(-0.81093*N54))
    
    N70 = len(x[x.iloc[:,1]<70]) - N54
    
    if (N70 <= 7.65):
        F_70H = 0.5714 * N70 + 0.625
    else:
        F_70H = 5
        
    F_H = F_54H + F_70H
    GVP = gvp(x, units=units)

    F_GVP = 1 + 9/(1 + np.exp(-0.049*(GVP-65.47)))
    
    
    TIR  =  len(x) - len(x[x.iloc[:,1]<70].iloc[:,1]) - len(x[x.iloc[:,1]>180].iloc[:,1])
    PTIR = TIR*100/len(x)
    
    F_PTIR = 1 + 9/(1 + np.exp(0.0833*(PTIR - 55.04)))
    
    MG = np.mean(x.iloc[:, 1])
    F_MG = 1 + 9 * ( 1/(1 + np.exp(0.1139*(MG-72.08))) + 1/(1 + np.exp(-0.09195*(MG-157.57))) )
    
    PGS = F_GVP + F_MG + F_PTIR + F_H
    PGS.columns=['PGS']
    return PGS['PGS'][0]



In [73]:
#pgs => FULL
x = pgs(xx, units='mg')
x

17.93904476543019

In [76]:
dates = []
for i in range(len(xx.index)):
    dates.append(xx['Display Time'][i].date())
xx['Date'] = dates   
xx.head()

Unnamed: 0,Display Time,GlucoseValue,subjectId,Date
0,2025-04-17 00:02:07,81.0,OD552,2025-04-17
1,2025-04-17 00:07:07,80.0,OD552,2025-04-17
2,2025-04-17 00:12:07,80.0,OD552,2025-04-17
3,2025-04-17 00:17:07,81.0,OD552,2025-04-17
4,2025-04-17 00:22:07,82.0,OD552,2025-04-17


In [14]:
pgs_daily = []
for Date, df in xx.groupby('Date'):
#     print(type(df))
    pgs_daily.append(pgs(df, units='mg'))

In [77]:
#pgs => daily: averaged
mean(pgs_daily)

13.293086496948328

In [83]:
start = xx.Date[0]
end = start+timedelta(days=7)
last_date = xx.Date.iloc[-1]


In [84]:
pgs_weekly = []
while end <= last_date:
    xy = pd.DataFrame()
    mask = (xx['Date'] >= start) & (xx['Date'] <= end)
    xy = pd.concat([xy, xx.loc[mask]],ignore_index=True)
    pgs_weekly.append(pgs(xy, units='mg'))
    start = end+timedelta(days=1)
    end = start+timedelta(days=7)
    
end = last_date
mask = (xx['Date'] >= start) & (xx['Date'] <= end)
xy = pd.concat([xy, xx.loc[mask]],ignore_index=True)
pgs_weekly.append(pgs(xy, units='mg'))
print(pgs_weekly)
    

[16.409214478254377, 17.03795741695646, 19.98236789072719, 16.490565199365335, 19.162284038752045]


In [85]:
#PGS => week: averaged
mean(pgs_weekly)

17.816477804811083