# Trim CGM Data Script

Utilizes each patient's summary, glucose, ECG, and breathing tables. Not meant to use on patient Day 1 data due to insertion trauma and elevated noise levels.

In [1]:
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta

In [2]:
def trimCGM(files:list, day:int):
    """Takes in a list of paths to csv files. Writes the trimmed data to a folder, which we will import into MATLAB for making and L-tuple SGD.
    csv files list should be in the order of Summary, ECG, Breathing, and Glucose tables.
    Returns a list of 2 dataframes, qualities which is a joined Summary, ECG, and Breathing table in the first index and the correspoinding Glucose
    table in the second index."""

    # read in the 4 tables for Patient X on Day Y
    summary = pd.read_csv(files[0])
    ecg = pd.read_csv(files[1])
    breathing = pd.read_csv(files[2])
    glucose = pd.read_csv(files[3])



    # ----- CLEANING UP GLUCOSE TABLE -----
    # filter out all manual readings
    glucose = glucose[glucose.type != 'manual']

    # create a string for the day number you want to filter glucose table for
    date_str = f'2014-10-0{day}'

    # filter glucose table for indicated day only
    glucose = glucose[glucose.date == date_str]

    # combine the strings from date and time coulmns and turm them into datetime objects
    glucose['Time'] = glucose.date + " " + glucose.time
    glucose = glucose.loc[:, ['Time', 'glucose', 'type']]

    # convert Time column into datetime objects
    glucose['Time'] = pd.to_datetime(glucose['Time'])

    

    # ----- CLEANING UP SUMMARY TABLE -----
    # drop unecessary columns
    summary = summary.drop('BRNoise', axis=1)
    summary = summary.drop('BRConfidence', axis=1)
    summary = summary.drop('GSR', axis=1)
    summary = summary.drop('LinkQuality', axis=1)
    summary = summary.drop('RSSI', axis=1)
    summary = summary.drop('TxPower', axis=1)
    summary = summary.drop('ECGNoise', axis=1)
    summary = summary.drop('HRV', axis=1)

    # fill 0's in HR column with the previous value
    summary['HR'] = summary['HR'].mask(summary['HR'] == 0).ffill(downcast='infer')

    # convert Time column into datetime object
    summary['Time'] = summary['Time'].apply(lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M:%S.%f'))



    # ----- TRIMMING GLUCOSE AND SUMMARY TABLES -----
    # TOP TRIM
    # find time that patient woke up on Day 2 (first time recorded in summary table)
    summary_start_time = summary['Time'].iloc[0]

    # filter glucose table to be start 5 minutes after summary_start_time
    glucose = glucose[glucose['Time'] > summary_start_time + timedelta(minutes=5)]

    # get the start time from raw_glucose_top_trim
    glucose_start_time = glucose['Time'].iloc[0]

    # go back to summary table and cut it back 5 minutes from the start of glucose table
    summary = summary[summary['Time'] > glucose_start_time - timedelta(minutes=5)]

    # get the last time stamp in summary table - this is the time they go to sleep
    summary_end_time = summary['Time'].iloc[-1]

    # now we want to filter glucose to end 5 minutes after summary_end_time
    glucose = glucose[glucose['Time'] < summary_end_time + timedelta(minutes=5)]

    

    # ----- CLEANING ECG DATA -----
    # take the millisecond string from the the first entry
    millisecond_str = ecg['Time'].iloc[0][-3:]

    # filter for only times with that milisecond string
    ecg = ecg.loc[ecg['Time'].str[-3:] == millisecond_str]

    # turn the Time coulmn into datetime objects
    ecg['Time'] = ecg['Time'].apply(lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M:%S.%f'))

    # align raw_ecg with the summary times
    new_summmary_start_time = summary['Time'].iloc[0]
    ecg = ecg[ecg['Time'] >= new_summmary_start_time]


    
    # ----- CLEANING BREATHING DATA -----
    # filter for only times with that milisecond string
    breathing = breathing.loc[breathing['Time'].str[-3:] == millisecond_str]

    # turn the Time coulmn into datetime objects
    breathing['Time'] = breathing['Time'].apply(lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M:%S.%f'))

    # align with raw_summary data
    ecg = ecg[ecg['Time'] >= new_summmary_start_time]


    qualities = pd.concat([summary.set_index('Time'),
                            ecg.set_index('Time'),
                            breathing.set_index('Time')], axis=1, join='inner')

    return [qualities, glucose]

In [9]:
# process Patient 1 day 2 data
files = ["patient1data/day2/2014_10_02-10_56_44_Summary.csv",
        "patient1data/day2/2014_10_02-10_56_44_ECG.csv",
        "patient1data/day2/2014_10_02-10_56_44_Breathing.csv",
        "patient1data/glucose.csv"]
day2 = trimCGM(files, 2)

In [8]:
# process Patient 1 day 3 data
files = ["patient1data/day3/2014_10_03-06_36_24_Summary.csv",
        "patient1data/day3/2014_10_03-06_36_24_ECG.csv",
        "patient1data/day3/2014_10_03-06_36_24_Breathing.csv",
        "patient1data/glucose.csv"]
day3 = trimCGM(files, 3)

In [7]:
# process Patient 1 day 4 data
files = ["patient1data/day4/2014_10_04-06_34_57_Summary.csv",
        "patient1data/day4/2014_10_04-06_34_57_ECG.csv",
        "patient1data/day4/2014_10_04-06_34_57_Breathing.csv",
        "patient1data/glucose.csv"]
day4 = trimCGM(files, 4)

In [14]:
# concatenate the qualities dataframes returned from day2, day3, and day4
patient1_qualities = pd.concat([day2[0], day3[0], day4[0]])
patient1_qualities

Unnamed: 0_level_0,HR,BR,SkinTemp,Posture,Activity,PeakAccel,BatteryVolts,BatteryLevel,BRAmplitude,ECGAmplitude,...,SagittalMin,SagittalPeak,DeviceTemp,StatusInfo,CoreTemp,AuxADC1,AuxADC2,AuxADC3,EcgWaveform,BreathingWaveform
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-02 10:59:00.420,90,7.4,-3276.8,-12,0.02,0.06,4.158,93,6.0,0.0008,...,0.09,0.18,26.0,531,36.8,416,414,462,1552,7500985
2014-10-02 10:59:01.420,90,7.4,-3276.8,-13,0.05,0.12,4.158,93,6.0,0.0008,...,0.10,0.20,26.0,531,36.8,415,421,483,1535,7501839
2014-10-02 10:59:02.420,90,7.5,-3276.8,-13,0.04,0.10,4.158,93,7.0,0.0008,...,0.08,0.20,26.0,531,36.8,413,429,484,1597,7499544
2014-10-02 10:59:03.420,90,7.5,-3276.8,-11,0.04,0.07,4.158,93,8.0,0.0008,...,0.07,0.21,26.0,531,36.8,415,421,483,1602,7497676
2014-10-02 10:59:04.420,90,7.7,-3276.8,-12,0.07,0.12,4.158,93,10.0,0.0008,...,0.08,0.22,26.0,531,36.8,415,423,485,1580,7509417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-10-04 13:47:56.423,62,18.4,-3276.8,-7,0.07,0.10,3.888,48,67.0,0.0008,...,0.04,0.22,32.4,531,35.3,409,419,461,2012,7747799
2014-10-04 13:47:57.423,62,17.7,-3276.8,-7,0.07,0.08,3.888,48,60.0,0.0008,...,0.03,0.12,32.4,531,35.3,414,420,483,1985,7746251
2014-10-04 13:47:58.423,62,17.7,-3276.8,-7,0.05,0.08,3.888,48,53.0,0.0008,...,0.03,0.12,32.4,531,35.3,419,424,486,1883,7747052
2014-10-04 13:47:59.423,62,16.9,-3276.8,-8,0.02,0.04,3.888,48,47.0,0.0008,...,0.04,0.13,32.4,531,35.3,415,421,483,1916,7746571


In [15]:
# concatenate the glucose dataframes return from day2, day3, day4
patient1_glucose = pd.concat([day2[1], day3[1], day4[1]])
patient1_glucose

Unnamed: 0,Time,glucose,type
193,2014-10-02 11:04:00,14.4,cgm
194,2014-10-02 11:09:00,14.3,cgm
195,2014-10-02 11:14:00,14.2,cgm
196,2014-10-02 11:19:00,14.1,cgm
197,2014-10-02 11:24:00,14.2,cgm
...,...,...,...
809,2014-10-04 13:29:01,6.3,cgm
810,2014-10-04 13:34:01,6.7,cgm
811,2014-10-04 13:39:01,7.0,cgm
812,2014-10-04 13:44:01,7.3,cgm
