# Introduction
* description: Example jupyter notebook for loading Tidepool data and calculating cgm stats
* version: 0.0.1
* created: 2018-07-17
* author: Ed Nykaza
* dependencies:
    * requires tidepool-analytics environment (see root directory for instructions
    on how to install the environment).
* license: BSD-2-Clause

# Load required libraries

In [84]:
import pandas as pd
import os

# Load data

In [85]:
# define the data path
dataPath = os.path.join("..", "example-data")

# load json data
data = pd.read_csv(os.path.join(dataPath, "example-from-j-jellyfish.csv"))

# view the first 5 rows of data
data.head()

Unnamed: 0,jsonRowIndex,annotations,clockDriftOffset,conversionOffset,deliveryType,deviceId,deviceTime,duration,expectedDuration,guid,...,state,timeProcessing,timezone,version,bgInput,bolus,carbInput,insulinCarbRatio,insulinOnBoard,recommended
0,2193,,0.0,0,scheduled,InsOmn-130346997,2017-12-31T18:00:00,14400000.0,,6b6b6e42-abcf-4554-b121-7074a0918484,...,,,,,,,,,,
1,2190,,0.0,0,scheduled,InsOmn-130346997,2017-12-31T22:00:00,7200000.0,,cc62b7b6-227c-4bef-9883-b88066063706,...,,,,,,,,,,
2,2188,,0.0,0,scheduled,InsOmn-130346997,2018-01-01T00:00:00,12600000.0,,5f37715a-3589-4283-8d88-f9398f14c2d3,...,,,,,,,,,,
3,2185,,0.0,0,scheduled,InsOmn-130346997,2018-01-01T03:30:00,7200000.0,,f9aeb728-6723-4f3d-8a03-cd8854a459bb,...,,,,,,,,,,
4,2184,,0.0,0,scheduled,InsOmn-130346997,2018-01-01T05:30:00,10163000.0,,bab3ca8e-f663-4299-a1c9-60b5b766a868,...,,,,,,,,,,


In [86]:
# get a list of the unique data types
data["type"].unique()

array(['basal', 'bolus', 'cbg', 'deviceEvent', 'pumpSettings', 'smbg',
       'upload', 'wizard'], dtype=object)

In [87]:
# get a list of all data fields
data.columns

Index(['jsonRowIndex', 'annotations', 'clockDriftOffset', 'conversionOffset',
       'deliveryType', 'deviceId', 'deviceTime', 'duration',
       'expectedDuration', 'guid', 'id', 'payload', 'percent', 'rate',
       'scheduleName', 'suppressed', 'time', 'timezoneOffset', 'type',
       'uploadId', 'expectedNormal', 'extended', 'normal', 'subType',
       '_deduplicator', 'units', 'value', 'alarmType', 'reason', 'status',
       'activeSchedule', 'basalSchedules', 'bgTarget', 'carbRatio',
       'insulinSensitivity', '_dataState', '_state', 'byUser', 'computerTime',
       'dataSetType', 'deviceManufacturers', 'deviceModel',
       'deviceSerialNumber', 'deviceTags', 'state', 'timeProcessing',
       'timezone', 'version', 'bgInput', 'bolus', 'carbInput',
       'insulinCarbRatio', 'insulinOnBoard', 'recommended'],
      dtype='object')

# Get cgm stats

In [88]:
# get just the cgm data
cgm = data[data["type"] == "cbg"].copy()

# look at the first 5 rows of data
cgm.head()

Unnamed: 0,jsonRowIndex,annotations,clockDriftOffset,conversionOffset,deliveryType,deviceId,deviceTime,duration,expectedDuration,guid,...,state,timeProcessing,timezone,version,bgInput,bolus,carbInput,insulinCarbRatio,insulinOnBoard,recommended
713,1933,,0.0,0,,AbbottFreeStyleLibre-JKGX280-T2691,2018-01-17T02:51:17,,,9ccd402e2a9c4275b5ac226d1dc73564,...,,,,,,,,,,
714,1932,,0.0,0,,AbbottFreeStyleLibre-JKGX280-T2691,2018-01-17T03:06:17,,,af423d055ecf4bc09864f56705f0c246,...,,,,,,,,,,
715,1931,,0.0,0,,AbbottFreeStyleLibre-JKGX280-T2691,2018-01-17T03:21:17,,,d8efb6c8c88a4b35a2e8a34480d81426,...,,,,,,,,,,
716,1929,,0.0,0,,AbbottFreeStyleLibre-JKGX280-T2691,2018-01-17T03:36:17,,,f4c6f77b38964eb991c189ea4b4dd3e7,...,,,,,,,,,,
717,1928,,0.0,0,,AbbottFreeStyleLibre-JKGX280-T2691,2018-01-17T03:51:17,,,355075ac8d7b42c3b4fba5992aefd593,...,,,,,,,,,,


In [89]:
# rename "value" field to "mmol/L"
cgm = cgm.rename(columns={"value": "mmol_L"})

# convert mmol/L to mg/dL and create a new field
cgm["mg_dL"] = (cgm["mmol_L"] * 18.01559).astype(int)

# view the cgm mg/dL data
cgm.mg_dL.head()

713     70
714     85
715     94
716    102
717    102
Name: mg_dL, dtype: int64

In [97]:
# define a function that captures the Ambulatory Glucose Profile statistics
# http://www.agpreport.org/agp/agpreports#CGM_AGP
def get_stats(df):
    
    statsDF = pd.DataFrame(index=[0])
    
    totalNumberCBGValues = df.mg_dL.count()
    statsDF["totalCgmValues"] = totalNumberCBGValues

    firstDataPoint = df["deviceTime"].min()
    lastDataPoint = df["deviceTime"].max()
    
    if "FreeStyle" in df.deviceId.describe()["top"]:
        dataFrequency = "15"
    else:
        dataFrequency = "5"
            
    totalPossibleCgmValues = len(
        pd.date_range(firstDataPoint,
                      lastDataPoint,
                      freq=dataFrequency + "min")
    )
    
    statsDF["totalPossibleCgmReadings"] = totalPossibleCgmValues
        
    statsDF["percentOfPossibleCgmReadings"] = \
        totalNumberCBGValues / totalPossibleCgmValues
    
    statsDF["firstDataPoint"] = firstDataPoint 
    statsDF["lastDataPoint"] = lastDataPoint
    statsDF["daysOfCgmData"] = \
        int((pd.Timestamp(lastDataPoint) - pd.Timestamp(firstDataPoint)).days)
      
    mean_mgdL = df.mg_dL.mean()
    statsDF["mean_mgdL"] = mean_mgdL
    
    std_mgdL = df.mg_dL.std()
    statsDF["std_mgdL"] = std_mgdL
    
    cov_mgdL = std_mgdL / mean_mgdL
    statsDF["cov_mgdL"] = cov_mgdL
    
    totalBelow54 = sum(df.mg_dL < 54)
    statsDF["percentBelow54"] = totalBelow54 / totalNumberCBGValues
    
    totalBelow70 = sum(df.mg_dL < 70)    
    statsDF["percentBelow70"] = totalBelow70 / totalNumberCBGValues
    
    total70to180 = sum((df.mg_dL >= 70) & (df.mg_dL <= 180))    
    statsDF["percentTimeInRange"] = total70to180 / totalNumberCBGValues
    
    totalAbove180 = sum(df.mg_dL > 180)
    statsDF["percentAbove180"] = totalAbove180 / totalNumberCBGValues
    
    totalAbove250 = sum(df.mg_dL > 250)
    statsDF["percentAbove250"] = totalAbove250 / totalNumberCBGValues

    statsDF["min_mgdL"] = df.mg_dL.min()
    statsDF["10%"] = df.mg_dL.quantile(q=0.10)
    statsDF["25%"] = df.mg_dL.quantile(q=0.25)
    statsDF["median"] = df.mg_dL.quantile(q=0.50)
    statsDF["75%"] = df.mg_dL.quantile(q=0.75)
    statsDF["90%"] = df.mg_dL.quantile(q=0.90)
    statsDF["max_mgdL"] = df.mg_dL.max()

    # get estimated HbA1c or Glucose Management Index (GMI)
    # GMI(%) = 3.31 + 0.02392 x [mean glucose in mg/dL]
    # https://www.jaeb.org/gmi/
    statsDF["GMI"] = 3.31 + (0.02392 * mean_mgdL)
    
    return statsDF

In [98]:
# apply function to get stats
cgmStats = get_stats(cgm)
cgmStats.T

Unnamed: 0,0
totalCgmValues,1282
totalPossibleCgmReadings,1367
percentOfPossibleCgmReadings,0.93782
firstDataPoint,2018-01-17T02:51:17
lastDataPoint,2018-01-31T08:34:03
daysOfCgmData,14
mean_mgdL,137.79
std_mgdL,57.9642
cov_mgdL,0.42067
percentBelow54,0.0234009
