In [1]:
import pandas as pd
import pandas
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta
import pywt
from sklearn.model_selection import train_test_split
import scipy
import scipy.fftpack
from numpy.fft import fft
from sklearn.decomposition import PCA
from sklearn import tree
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
import pickle

insulinDf = pandas.read_csv('InsulinData.csv',parse_dates=[['Date','Time']],low_memory=False,usecols=['Date','Time','BWZ Carb Input (grams)']).iloc[::-1]
cgmDf = pandas.read_csv('CGMData.csv',parse_dates=[['Date','Time']],low_memory=False,usecols=['Date','Time','Sensor Glucose (mg/dL)']).iloc[::-1]

insulinDfset2 = pandas.read_csv('InsulinAndMealIntake670GPatient3.csv',parse_dates=[['Date','Time']],low_memory=False,usecols=['Date','Time','BWZ Carb Input (grams)']).iloc[::-1]
cgmDfset2 = pandas.read_csv('CGMData670GPatient3.csv',parse_dates=[['Date','Time']],low_memory=False,usecols=['Date','Time','Sensor Glucose (mg/dL)']).iloc[::-1]


In [3]:
def makeDf(insulinDF, cgmDf):
    mealDataMatrix = []
    noMealDataMatrix = []

    insulinneutralize = insulinDf[
        insulinDf["BWZ Carb Input (grams)"].notnull()
        & insulinDf["BWZ Carb Input (grams)"]
        != 0
    ]
    insulinMealDates = pandas.DataFrame(insulinneutralize["Date_Time"])

    insulinMealDates["DiffwBelow"] = (
        insulinMealDates.iloc[:, 0].diff(-1).dt.total_seconds()/3600
    )
    insulinMealDates = insulinMealDates.loc[insulinMealDates["DiffwBelow"] <= -2]
    insulinNoMealDates = insulinMealDates.loc[insulinMealDates["DiffwBelow"] <= -4]

    mealDatesList = cleaning(insulinMealDates, cgmDf)
    for mealDateTime in mealDatesList:
        idx = cgmDf[cgmDf["Date_Time"] == mealDateTime]["Sensor Glucose (mg/dL)"].index[
            0
        ]
        mealDataMatrix.append(
            list(
                cgmDf["Sensor Glucose (mg/dL)"]
                .iloc[cgmDf.shape[0] - 1 - idx - 6 : cgmDf.shape[0] - 1 - idx + 24]
                .values
            )
        )
   

    noMealDatesList = cleaning(insulinNoMealDates, cgmDf)
    for noMealDateTime in noMealDatesList:
        idx = cgmDf[cgmDf["Date_Time"] == noMealDateTime][
            "Sensor Glucose (mg/dL)"
        ].index[0]
        noMealDataMatrix.append(
            list(
                cgmDf["Sensor Glucose (mg/dL)"]
                .iloc[cgmDf.shape[0] - 1 - idx + 24 : cgmDf.shape[0] - 1 - idx + 48]
                .values
            )
        )

    mealDf = pandas.DataFrame(mealDataMatrix).dropna()
    noMealDf = pandas.DataFrame(noMealDataMatrix).dropna()
    return (mealDf, noMealDf)

def cleaning(arg0, cgmDf):
    arg0.drop(arg0.head(1).index, inplace=True)
    arg0.drop(arg0.tail(2).index, inplace=True)
    return [
        cgmDf.loc[cgmDf['Date_Time'] >= arg0['Date_Time'][ind]][
            'Date_Time'
        ].iloc[0]
        for ind in arg0.index
    ]

In [4]:
mealDf,noMealDf=makeDf(insulinDf,cgmDf)
mealDfset2, noMealDfset2=makeDf(insulinDfset2,cgmDfset2)

meal = pandas.concat([mealDf, mealDfset2], ignore_index=True, sort = False)
no_meal = pandas.concat([noMealDf, noMealDfset2], ignore_index=True, sort = False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [5]:
def createmealfeaturematrix(data):
    op_matrix = pd.DataFrame()
    # time between max and min glucose
    op_matrix["Time_bet_max_min"] = (
        data.iloc[:, 22:25].idxmin(axis=1) - data.iloc[:, 5:19].idxmax(axis=1)
    ) * 5
    
    # normalize glucose difference
    op_matrix["Glucose_Difference_normalized"] = (
        data.iloc[:, 5:19].max(axis=1) - data.iloc[:, 22:25].min(axis=1)
    ) / (data.iloc[:, 22:25].min(axis=1))

    # windowed mean
    if data.shape[1] > 24:
        for i in range(6, data.shape[1], 6):
            op_matrix["Mean_" + str(i - 6)] = data.iloc[
                :, i : i + 6
            ].mean(axis=1)
    else:
        for i in range(0, data.shape[1], 6):
            op_matrix["Mean_" + str(i)] = data.iloc[:, i : i + 6].mean(
                axis=1
            )
            
    # cgm velocity
    velocityDF = pd.DataFrame()
    for i in range(0, data.shape[1] - 5):
        velocityDF["Vel_" + str(i)] = data.iloc[:, i + 5] - data.iloc[:, i]
    op_matrix["Window_Velocity_Max"] = velocityDF.max(axis=1, skipna=True)
    
    #1st and 2nd differential
    tm=data.iloc[:,22:25].idxmin(axis=1)
    maximum=data.iloc[:,5:19].idxmax(axis=1)
    list1=[]
    second_differential_data=[]
    standard_deviation=[]
    for i in range(len(data)):
        list1.append(np.diff(data.iloc[:,maximum[i]:tm[i]].iloc[i].tolist()).max())
        second_differential_data.append(np.diff(np.diff(data.iloc[:,maximum[i]:tm[i]].iloc[i].tolist())).max())
        standard_deviation.append(np.std(data.iloc[i]))
    op_matrix['1stDifferential']=list1
    op_matrix['2ndDifferential']=second_differential_data

    # fft_4
    def get_fft(row):
        cgmFFTValues = np.abs(fft(row))
        cgmFFTValues.sort()
        return np.flip(cgmFFTValues)[0:4]
    
    FFT = pd.DataFrame()
    FFT["FFT_Top2"] = data.apply(lambda row: get_fft(row), axis=1)
    FFT_updated = pd.DataFrame(
        FFT.FFT_Top2.tolist(), columns=["FFT_2", "FFT_3", "FFT_4","FFT_5"]
    )
    FFT_updated.head()
    for i in range(1,5):
        op_matrix["FFT_" + str(i + 1)] = FFT_updated["FFT_" + str(i + 1)]

    return op_matrix


In [8]:
Meal_features=createmealfeaturematrix(meal)
Meal_features.drop_duplicates()

Unnamed: 0,Time_bet_max_min,Glucose_Difference_normalized,Mean_0,Mean_6,Mean_12,Mean_18,Window_Velocity_Max,1stDifferential,2ndDifferential,FFT_2,FFT_3,FFT_4,FFT_5
0,30,-0.041199,230.666667,228.833333,268.000000,275.500000,44.0,8.0,2.0,7677.0,475.903609,475.903609,132.355707
1,55,0.833333,104.666667,110.500000,70.333333,73.500000,60.0,4.0,11.0,2445.0,436.384942,436.384942,322.178882
2,20,-0.042453,187.166667,175.000000,209.833333,210.166667,38.0,5.0,3.0,5919.0,291.275268,291.275268,122.914246
3,30,0.189944,135.166667,194.333333,197.500000,166.500000,62.0,-4.0,5.0,4970.0,613.565611,613.565611,145.278432
4,25,-0.068027,74.333333,122.500000,138.666667,186.500000,50.0,8.0,7.0,3425.0,806.607594,806.607594,464.817995
...,...,...,...,...,...,...,...,...,...,...,...,...,...
806,75,0.162162,170.000000,166.000000,164.666667,138.000000,11.0,4.0,5.0,4808.0,193.284845,193.284845,124.181318
807,25,0.032432,138.500000,176.833333,185.833333,185.500000,42.0,0.0,2.0,4881.0,504.863235,504.863235,125.141376
808,85,0.077670,109.000000,102.333333,105.500000,113.000000,16.0,4.0,3.0,3262.0,98.589862,98.589862,54.514263
809,35,0.048649,171.333333,183.166667,187.833333,225.500000,46.0,0.0,2.0,5515.0,340.338083,340.338083,305.419463


In [None]:
No_Meal_features=createmealfeaturematrix(no_meal)
No_Meal_features

Unnamed: 0,Time_bet_max_min,Glucose_Difference_normalized,Mean_0,Mean_6,Mean_12,Mean_18,Window_Velocity_Max,1stDifferential,2ndDifferential,FFT_2,FFT_3,FFT_4,FFT_5
0,90,1.262295,166.000000,115.166667,78.833333,63.500000,-2.0,2.0,5.0,2541.0,557.106264,557.106264,266.208115
1,85,4.725000,252.000000,186.666667,111.166667,50.500000,-29.0,-7.0,6.0,3602.0,1092.090082,1092.090082,490.517942
2,25,0.029851,100.500000,97.666667,120.000000,134.000000,39.0,1.0,3.0,2713.0,247.070293,247.070293,88.154528
3,85,2.125000,134.833333,96.666667,88.666667,47.333333,2.0,2.0,12.0,2205.0,346.151763,346.151763,315.605608
4,50,0.139831,197.833333,245.166667,263.833333,248.333333,60.0,1.0,6.0,5731.0,419.394090,419.394090,130.679203
5,25,0.030120,123.500000,144.500000,160.666667,167.000000,37.0,1.0,5.0,3574.0,224.479166,224.479166,154.481040
6,90,1.549451,231.500000,198.166667,162.500000,111.833333,-2.0,7.0,22.0,4224.0,625.283930,625.283930,338.049019
7,45,-0.117241,123.333333,115.666667,126.833333,137.833333,21.0,4.0,2.0,3022.0,129.100187,129.100187,46.553427
8,60,0.077844,149.500000,171.166667,162.333333,166.833333,31.0,10.0,9.0,3899.0,167.577659,167.577659,87.396389
9,20,0.067164,84.666667,89.666667,128.333333,136.500000,43.0,1.0,6.0,2635.0,346.043329,346.043329,120.632363


In [None]:
pca = PCA(n_components=10)
principalComponents = pca.fit(Meal_features)
PCA_mealdata = pca.fit_transform(Meal_features)

principalComponents = pca.fit(No_Meal_features)
PCA_nomealdata = pca.fit_transform(No_Meal_features)


In [None]:
Training_data = np.concatenate((PCA_mealdata, PCA_nomealdata), axis=0)
no_of_mealrows = meal.shape[0]
no_of_nomealrows = no_meal.shape[0]
Training_labels = [1 for _ in range(no_of_mealrows)]
for _ in range(no_of_nomealrows):
    Training_labels.append(0)


In [None]:
A=pd.DataFrame(Training_data)
B=pd.DataFrame(Training_labels)
FM = pd.concat([A,B], axis=1, sort=False)

Total_Data = FM.reindex(np.random.permutation(FM.index))

kf = RepeatedKFold(n_splits=5, n_repeats=5)

for train_index, test_index in kf.split(Total_Data):
    trainData = Total_Data.iloc[train_index, 0:10]
    trainLabel = Total_Data.iloc[train_index, 10]
    testData = Total_Data.iloc[test_index, 0:10]
    testLabel = Total_Data.iloc[test_index, 10]

    clf = tree.DecisionTreeClassifier()
    clf.fit(trainData, trainLabel)
    #predictedLabel = clf.predict(testData)
    
    pickle.dump(clf, open("model.pkl", 'wb'))