In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("~/Desktop/NCSA_genomics/Python - notebooks/TSForecasting/Data/consolidatedDataForPackage.csv", sep=",") # pre-processed by removing entry "Low"
#data

In [18]:
from datetime import datetime
from statistics import mean

In [50]:
data['Display Time'] = pd.to_datetime(data['Display Time'])
data['GlucoseValue'] = pd.to_numeric(data['GlucoseValue'])
xx = data[data['subjectId'] == "1636-69-032"]
xx.dtypes

Display Time    datetime64[ns]
GlucoseValue           float64
subjectId               object
dtype: object

In [51]:
def fullDay(data):
        
    dates = list()
    data = data.reset_index(drop=True)
    for i in range(0,len(data.index)):
        dates.append(data['Display Time'][i].date())
    data['Dates'] = dates
    end = data['Dates'].iloc[-1]
    start = data['Dates'].iloc[0]

    indexVals = data[ data['Dates'] == start ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    indexVals = data[ data['Dates'] == end ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    data = data.reset_index(drop=True)

    data.drop(['Dates'], axis=1, inplace=True)

    return data

In [52]:
xx = fullDay(xx)
xx.head()

Unnamed: 0,Display Time,GlucoseValue,subjectId
0,2016-01-14 00:03:15,120.0,1636-69-032
1,2016-01-14 00:08:15,120.0,1636-69-032
2,2016-01-14 00:13:15,123.0,1636-69-032
3,2016-01-14 00:18:15,126.0,1636-69-032
4,2016-01-14 00:23:15,127.0,1636-69-032


In [53]:
length = xx['Display Time'].iloc[-1]-xx['Display Time'].iloc[0]
length = length.round("d")
days = length.days

In [54]:
days

5

In [55]:
def gvp(x, units):
    if (units != 'mg'):
        return print('units can only be mg')
    
    dt = x.iloc[:, 0].diff()/np.timedelta64(1,'m') # assuming that sampling can not necessarily be equally spaced
    dy = x.iloc[:, 1].diff()
    
    L = np.sum(np.sqrt(dt**2 + dy**2))
    L_0 = np.sum(dt)
    
    GVP = (L/L_0 -1) *100
    return pd.DataFrame({'GVP(%)':[GVP]})

In [56]:
# Personal Glycemic State
# The PGS is an additive composite metric calculated using the following simple equation
# PGS  = F(GVP) + F(MG) + F(PTIR) + F(H),
# where F(GVP) is a function of the glycemic variability percentage, 
# F(MG) is a function of the mean glucose, 
# F(PTIR) is a function of the percent time in range (from 70 to 180 mg/ dL), and 
# F(H) is a function of the incidence of the number of hypoglycemic episodes per week.
# The hypoglycemia function incorporates two distinct thresholds (54 and 70 mg/dL) and is 
# the sum of two terms: F54(H) and F70(H).
# PGS is computed per week and then averaged across all weeks.
# The min value of the PGS metric is 4.6 corresponding to excellent glycemic control 
# (no diabetes or patients with diabetes under superb glycemic control). 
# The max value of the PGS metric is 40 corresponding to a poor quality of glycemic control 
# that would be seen in patients with elevated A1c values, high mean glucose, and low percent of time 
# in the euglycemic range.

# DESCRIPTION: Takes in a sequesnce of continuous glucose values and computes
# glycemic variability percentage SDRC.
# Operated on data given either in mmol/L or mg/dL.

# FUNCTION PARAMETERS: x - is Pandas dataframe, in the fist column is given subject ID, 
# in the second - Pandas time stamp, and in the fird - numeric values of 
# continuous glucose readings.

# RETRUN: Output is Pandas dataframe that contains numeric value for DT.

# REFERENCES:
# -  I. B. Hirsch, A. K. Balo, K. Sayer, A. Garcia, B. A. Buckingham, and T. A.Peyser. 
# A simple composite metric for the assessment of glycemic status fromcontinuous glucose 
# monitoring data: implications for clinical practice and theartificial pancreas. 
# Diabetes technology & therapeutics, 19(S3):S–38, 2017.

def pgs(x, units):
    if (units != 'mg'):
        return print('units can only be mg')
    
    N54 = len(x[x.iloc[:,1]<=54])
    F_54H = 0.5 + 4.5 * (1 - np.exp(-0.81093*N54))
    
    N70 = len(x[x.iloc[:,1]<70]) - N54
    
    if (N70 <= 7.65):
        F_70H = 0.5714 * N70 + 0.625
    else:
        F_70H = 5
        
    F_H = F_54H + F_70H
    GVP = gvp(x, units=units)

    F_GVP = 1 + 9/(1 + np.exp(-0.049*(GVP-65.47)))
    
    
    TIR  =  len(x) - len(x[x.iloc[:,1]<70].iloc[:,1]) - len(x[x.iloc[:,1]>180].iloc[:,1])
    PTIR = TIR*100/len(x)
    
    F_PTIR = 1 + 9/(1 + np.exp(0.0833*(PTIR - 55.04)))
    
    MG = np.mean(x.iloc[:, 1])
    F_MG = 1 + 9 * ( 1/(1 + np.exp(0.1139*(MG-72.08))) + 1/(1 + np.exp(-0.09195*(MG-157.57))) )
    
    PGS = F_GVP + F_MG + F_PTIR + F_H
    PGS.columns=['PGS']
    return PGS['PGS'][0]



In [57]:
x = pgs(xx, units='mg')
x

6.13428315674699

In [58]:
dates = []
for i in range(len(xx.index)):
    dates.append(xx['Display Time'][i].date())
    

In [59]:
xx['Date'] = dates
xx

Unnamed: 0,Display Time,GlucoseValue,subjectId,Date
0,2016-01-14 00:03:15,120.0,1636-69-032,2016-01-14
1,2016-01-14 00:08:15,120.0,1636-69-032,2016-01-14
2,2016-01-14 00:13:15,123.0,1636-69-032,2016-01-14
3,2016-01-14 00:18:15,126.0,1636-69-032,2016-01-14
4,2016-01-14 00:23:15,127.0,1636-69-032,2016-01-14
...,...,...,...,...
1434,2016-01-18 23:37:52,136.0,1636-69-032,2016-01-18
1435,2016-01-18 23:42:52,126.0,1636-69-032,2016-01-18
1436,2016-01-18 23:47:52,132.0,1636-69-032,2016-01-18
1437,2016-01-18 23:52:52,135.0,1636-69-032,2016-01-18


In [60]:
pgs_daily = []
for Date, df in xx.groupby('Date'):
#     print(type(df))
    pgs_daily.append(pgs(df, units='mg'))

In [61]:
pgs_daily

[5.385984980695627,
 6.080240047318844,
 5.850877511070355,
 5.436273573106833,
 5.86907828520549]

In [62]:
mean(pgs_daily)

5.72449087947943