# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import scipy as sp
import scipy.fftpack
import sklearn
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_score, accuracy_score, recall_score, f1_score
from sklearn.model_selection import KFold, RepeatedKFold, train_test_split, cross_val_score
import pickle
from scipy.interpolate import interp1d
from scipy.interpolate import interp2d

# Reading CSV Files

In [2]:
#reading CMGData csv file
df = pd.read_csv('CGMData.csv', low_memory=False)
df['Sensor Glucose (mg/dL)'] = df['Sensor Glucose (mg/dL)'].fillna(0)
df['DateTime'] = (df['Date'] + ' ' + df['Time'])
df['DateTime'] = pd.to_datetime(df['DateTime'])

#reading CMGData_patient2 csv file
df_2 = pd.read_csv('CGM_patient2.csv', low_memory=False)
df_2['Sensor Glucose (mg/dL)'] = df_2['Sensor Glucose (mg/dL)'].fillna(0)
df_2['DateTime'] = (df_2['Date'] + ' ' + df_2['Time'])
df_2['DateTime'] = pd.to_datetime(df_2['DateTime'])

#reading InsulinData csv file
dfI = pd.read_csv('InsulinData.csv', low_memory=False)
dfI['BWZ Carb Input (grams)'] = dfI['BWZ Carb Input (grams)'].fillna(0)
dfI['DateTime'] = (dfI['Date'] + ' ' + dfI['Time'])
dfI['DateTime'] = pd.to_datetime(dfI['DateTime'])

#reading Insulin_patient2 csv file
dfI_2 = pd.read_csv('Insulin_patient2.csv', low_memory=False)
dfI_2['BWZ Carb Input (grams)'] = dfI_2['BWZ Carb Input (grams)'].fillna(0)
dfI_2['DateTime'] = (dfI_2['Date'] + ' ' + dfI_2['Time'])
dfI_2['DateTime'] = pd.to_datetime(dfI_2['DateTime'])

# Extracting time from Insulin.CSV

In [3]:
#Day time range
dayTime_str = '02:00:00'
timeLimit = pd.to_timedelta(dayTime_str)
dayTime_str2 = '04:00:00'
timeLimit2 = pd.to_timedelta(dayTime_str)

#Extarcting Meal times from Insulin.CSV
dfI_Meals_Taken = dfI.loc[dfI['BWZ Carb Input (grams)'] > 0]
mealTime_List = dfI_Meals_Taken['DateTime'].tolist()

#Extarcting Meal times from Insulin_patient2.CSV
dfI_2_Meals_Taken = dfI_2.loc[dfI_2['BWZ Carb Input (grams)'] > 0]
mealTime_List_2 = dfI_2_Meals_Taken['DateTime'].tolist()

#Extracting valid Meal Times from Insulin.CSV
def extractMealTimeInsulin(mealTime_List):
    mealTime_Valid = [mealTime_List[0]]
    for i in range(len(mealTime_List) - 1):
        diff = mealTime_List[i] - mealTime_List[i+1]
        if diff > timeLimit:
            mealTime_Valid.append(mealTime_List[i+1])
    return mealTime_Valid

#Extracting valid no-meal Times from Insulin.CSV
def extractNoMealTimeInsulin(mealTime_List):
    nomealTime_Valid = [mealTime_List[0] + pd.Timedelta(minutes = 240)]
    for i in range(len(mealTime_List) - 1):
        diff = mealTime_List[i] - mealTime_List[i+1]
        if diff > timeLimit2:
            nomealTime_Valid.append(mealTime_List[i+1] + pd.Timedelta(minutes = 240))
    return nomealTime_Valid

#Defining valid times for both insulin files
mealTime_Valid = extractMealTimeInsulin(mealTime_List)
mealTime_Valid_2 = extractMealTimeInsulin(mealTime_List_2)

#Defining valid nomeal times for both insulin files
nomealTime_Valid = extractNoMealTimeInsulin(mealTime_List)
nomealTime_Valid_2 = extractNoMealTimeInsulin(mealTime_List_2)


# Extracting valid attributes from CGM 

In [4]:
nomeal_Matrix = []

#Extracting time series
def extractingValidTimeSeries(mealTime_Valid):
    timeCMG_Valid = []
    timeCMG_Valid_Extra30 = []
    timeCMG_ValidStart = []
    for i in mealTime_Valid:
        df_time = df.loc[df['DateTime'] >= i]
        time_2 = df_time['DateTime'].iloc[-1]
        timeCMG_ValidStart.append(time_2)
        time_3, time_4 = time_2 - pd.Timedelta(minutes = 30), time_2 + pd.Timedelta(minutes = 120)
        timeCMG_Valid.append(time_4)
        timeCMG_Valid_Extra30.append(time_3)
    return timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart

def extractingValidTimeSeries_noMeal(nomealTime_Valid):
    timeCMG_Valid_Start = []
    timeCMG_Valid_Post = []
    for i in nomealTime_Valid:
        df_time = df.loc[df['DateTime'] >= i]
        time_2 = df_time['DateTime'].iloc[-1]
        time_3 = time_2 - pd.Timedelta(minutes = 120)
        timeCMG_Valid_Post.append(time_2)
        timeCMG_Valid_Start.append(time_3)
    return timeCMG_Valid_Post, timeCMG_Valid_Start

def extractingValidTimeSeriesP2(mealTime_Valid):
    timeCMG_Valid = []
    timeCMG_Valid_Extra30 = []
    timeCMG_ValidStart = []
    for i in mealTime_Valid:
        df_time = df_2.loc[df_2['DateTime'] >= i]
        time_2 = df_time['DateTime'].iloc[-1]
        timeCMG_ValidStart.append(time_2)
        time_3, time_4 = time_2 - pd.Timedelta(minutes = 30), time_2 + pd.Timedelta(minutes = 120)
        timeCMG_Valid.append(time_4)
        timeCMG_Valid_Extra30.append(time_3)
    return timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart

def extractingValidTimeSeriesP2_noMeal(nomealTime_Valid):
    timeCMG_Valid_Start = []
    timeCMG_Valid_Post = []
    for i in nomealTime_Valid:
        try:
            df_time = df_2.loc[df_2['DateTime'] >= i]
            time_2 = df_time['DateTime'].iloc[-1]
            time_3 = time_2 - pd.Timedelta(minutes = 120)
            timeCMG_Valid_Post.append(time_2)
            timeCMG_Valid_Start.append(time_3)
        except IndexError:
            break
    return timeCMG_Valid_Post, timeCMG_Valid_Start


#######

timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart = extractingValidTimeSeries(mealTime_Valid)
timeCMG_Valid_2, timeCMG_Valid_Extra30_2, timeCMG_ValidStart_2 = extractingValidTimeSeriesP2(mealTime_Valid_2)

#NoMeal time data
timeCMG_Valid_Post_noMeal, timeCMG_Valid_Start_noMeal = extractingValidTimeSeries_noMeal(nomealTime_Valid)
timeCMG_Valid_Post_noMeal_2, timeCMG_Valid_Start_noMeal_2 = extractingValidTimeSeriesP2_noMeal(nomealTime_Valid_2)


# Extracting meal matrix

In [5]:
#Extracting meal and nomeal data

def extractingMealMatrix(timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart):
    meal_Matrix = []
    time_Matrix = []
    #Recorded true time
    trueMeal_Matrix = []
    trueTime_Matrix = []
    for i in range(len(timeCMG_Valid)):  
        df_valid = df.loc[df['DateTime'].between(timeCMG_Valid_Extra30[i], timeCMG_Valid[i], inclusive='both')]
        df_trueMeal = df.loc[df['DateTime'].between(timeCMG_ValidStart[i], timeCMG_Valid[i], inclusive='both')]
        df_valid1 = df_valid.loc[df_valid['Sensor Glucose (mg/dL)'] >= 0]
        df_trueMeal1 = df_trueMeal.loc[df_trueMeal['Sensor Glucose (mg/dL)'] >= 0]
    
        time_List = df_valid1['DateTime'].tolist()
        meal_List = df_valid1['Sensor Glucose (mg/dL)'].tolist()  
        trueTime_List = df_trueMeal1['DateTime'].tolist()
        trueMeal_List = df_trueMeal1['Sensor Glucose (mg/dL)'].tolist()
        if sum(trueMeal_List) > 0 and len(meal_List) >= 30:
            trueTime_Matrix.append(trueTime_List[0:24])
            trueMeal_Matrix.append(trueMeal_List[0:24])
            time_Matrix.append(time_List[0:30])
            meal_Matrix.append(meal_List[0:30])
    return meal_Matrix, time_Matrix, trueMeal_Matrix, trueTime_Matrix

def extractingNoMealMatrix(timeCMG_Valid_Post_noMeal, timeCMG_Valid_Start_noMeal):
    noMeal_Matrix = []
    noMeal_time_Matrix = []
    for i in range(len(timeCMG_Valid_Post_noMeal)):  
        df_valid = df.loc[df['DateTime'].between(timeCMG_Valid_Start_noMeal[i], timeCMG_Valid_Post_noMeal[i], inclusive='both')]
        df_valid1 = df_valid.loc[df_valid['Sensor Glucose (mg/dL)'] >= 0]
    
        time_List = df_valid1['DateTime'].tolist()
        noMeal_List = df_valid1['Sensor Glucose (mg/dL)'].tolist()  
        if sum(noMeal_List) > 0 and len(noMeal_List) >= 24:
            noMeal_time_Matrix.append(time_List[0:24])
            noMeal_Matrix.append(noMeal_List[0:24])
    return noMeal_Matrix, noMeal_time_Matrix

def extractingMealMatrixP2(timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart):
    meal_Matrix = []
    time_Matrix = []
    #Recorded true time
    trueMeal_Matrix = []
    trueTime_Matrix = []
    for i in range(len(timeCMG_Valid)):  
        df_valid = df_2.loc[df_2['DateTime'].between(timeCMG_Valid_Extra30[i], timeCMG_Valid[i], inclusive='both')]
        df_trueMeal = df_2.loc[df_2['DateTime'].between(timeCMG_ValidStart[i], timeCMG_Valid[i], inclusive='both')]
        df_valid1 = df_valid.loc[df_valid['Sensor Glucose (mg/dL)'] >= 0]
        df_trueMeal1 = df_trueMeal.loc[df_trueMeal['Sensor Glucose (mg/dL)'] >= 0]
    
        time_List = df_valid1['DateTime'].tolist()
        meal_List = df_valid1['Sensor Glucose (mg/dL)'].tolist()  
        trueTime_List = df_trueMeal1['DateTime'].tolist()
        trueMeal_List = df_trueMeal1['Sensor Glucose (mg/dL)'].tolist()
        if sum(trueMeal_List) > 0 and len(meal_List) >= 30:
            trueTime_Matrix.append(trueTime_List[0:24])
            trueMeal_Matrix.append(trueMeal_List[0:24])
            time_Matrix.append(time_List[0:30])
            meal_Matrix.append(meal_List[0:30])
    return meal_Matrix, time_Matrix, trueMeal_Matrix, trueTime_Matrix

def extractingNoMealMatrixP2(timeCMG_Valid_Post_noMeal, timeCMG_Valid_Start_noMeal):
    noMeal_Matrix = []
    noMeal_time_Matrix = []
    for i in range(len(timeCMG_Valid_Post_noMeal)):  
        df_valid = df_2.loc[df_2['DateTime'].between(timeCMG_Valid_Start_noMeal[i], timeCMG_Valid_Post_noMeal[i], inclusive='both')]
        df_valid1 = df_valid.loc[df_valid['Sensor Glucose (mg/dL)'] >= 0]
    
        time_List = df_valid1['DateTime'].tolist()
        noMeal_List = df_valid1['Sensor Glucose (mg/dL)'].tolist()  
        if sum(noMeal_List) > 0 and len(noMeal_List) >= 24:
            noMeal_time_Matrix.append(time_List[0:24])
            noMeal_Matrix.append(noMeal_List[0:24])
    return noMeal_Matrix, noMeal_time_Matrix

meal_Matrix, time_Matrix, trueMeal_Matrix, trueTime_Matrix = extractingMealMatrix(timeCMG_Valid, timeCMG_Valid_Extra30, timeCMG_ValidStart)
meal_Matrix_2, time_Matrix_2, trueMeal_Matrix_2, trueTime_Matrix_2 = extractingMealMatrixP2(timeCMG_Valid_2, timeCMG_Valid_Extra30_2, timeCMG_ValidStart_2)

#NoMeal List
noMeal_Matrix, noMeal_time_Matrix = extractingNoMealMatrix(timeCMG_Valid_Post_noMeal, timeCMG_Valid_Start_noMeal)
noMeal_Matrix_2, noMeal_time_Matrix_2 = extractingNoMealMatrixP2(timeCMG_Valid_Post_noMeal_2, timeCMG_Valid_Start_noMeal_2)

len(meal_Matrix), len(noMeal_Matrix)


(569, 566)

# Feature Extraction I
Extracting Time Difference

In [6]:
#Extracting time difference 'T' CGMmax-Time - CGMmealtime-Time
def extractTimeDifference(trueMeal_Matrix, trueTime_Matrix):
    timeDifference = []
    for i in range(len(trueMeal_Matrix)):
        CGMmeal_Time = trueTime_Matrix[i][0]
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        CGMmax_Time = trueTime_Matrix[i][CGMmax_Index]
        T_diff = pd.to_timedelta(CGMmax_Time - CGMmeal_Time).total_seconds()
        timeDifference.append(abs(T_diff/1000))
    return timeDifference

timeDifferenceList = extractTimeDifference(trueMeal_Matrix, trueTime_Matrix)
timeDifferenceList_2 = extractTimeDifference(trueMeal_Matrix_2, trueTime_Matrix_2)

#Extracting Tou for nomeal
def extractTimeDifference_noMeal(noMeal_Matrix, noMeal_time_Matrix):
    timeDifference = []
    for i in range(len(noMeal_Matrix)):
        CGMmeal_Time = noMeal_time_Matrix[i][0]
        CGMmax = max(noMeal_Matrix[i])
        CGMmax_Index = noMeal_Matrix[i].index(CGMmax)
        CGMmax_Time = noMeal_time_Matrix[i][CGMmax_Index]
        T_diff = pd.to_timedelta(CGMmax_Time - CGMmeal_Time).total_seconds()
        timeDifference.append(abs(T_diff/1000))
    return timeDifference

timeDifferenceList_noMeal = extractTimeDifference_noMeal(noMeal_Matrix, noMeal_time_Matrix)
timeDifferenceList_noMeal_2 = extractTimeDifference_noMeal(noMeal_Matrix_2, noMeal_time_Matrix_2)


# Feature Extraction II
Extracting CGM difference 'dG' CGMmax - CGMmeal

In [7]:
#Extracting CGM difference 'dG' CGMmax - CGMmeal
def extractdG(trueMeal_Matrix):
    dG_NormList = []
    dG_List = []
    for i in range(len(trueMeal_Matrix)):
        #CGMmeal = trueMeal_Matrix[i][0]
        #CGMavg = sum(trueMeal_Matrix[i])/len(trueMeal_Matrix[i])
        CGMmin = min(j for j in trueMeal_Matrix[i] if j > 0)
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        dG = CGMmax - CGMmin
        dG_List.append(dG)
        dG_Norm = dG/CGMmin
        dG_NormList.append(round(dG_Norm, 2))
    return dG_NormList
        
dG_NormList = extractdG(trueMeal_Matrix)
dG_NormList_2 = extractdG(trueMeal_Matrix_2)

#Extracting CGM difference 'dG' CGMmax - CGMmeal
def extractdG_noMeal(noMeal_Matrix):
    dG_NormList = []
    dG_List = []
    for i in range(len(noMeal_Matrix)):
        #CGMmeal = trueMeal_Matrix[i][0]
        #CGMavg = sum(trueMeal_Matrix[i])/len(trueMeal_Matrix[i])
        CGMmin = min(j for j in noMeal_Matrix[i] if j > 0)
        CGMmax = max(noMeal_Matrix[i])
        CGMmax_Index = noMeal_Matrix[i].index(CGMmax)
        dG = CGMmax - CGMmin
        dG_List.append(dG)
        dG_Norm = dG/CGMmin
        dG_NormList.append(round(dG_Norm, 2))
    return dG_NormList

dG_NormList_noMeal = extractdG_noMeal(noMeal_Matrix)
dG_NormList_noMeal_2 = extractdG_noMeal(noMeal_Matrix_2)


# Feature Extraction III
Extracting peaks and frequencies unsinf FFT

In [8]:
#Extracting FFT values
def extractFFTValues(meal_Matrix):
    freq2 = []
    freq3 = []
    pow2 = []
    pow3 = []
    for i in range(len(meal_Matrix)):
        CGM_temp = meal_Matrix[i]
        fft = sp.fftpack.fft(CGM_temp)
        amp = np.abs(fft)
        psd = amp ** 2
        freq = sp.fftpack.fftfreq(len(CGM_temp), 1)

        #Sorting and finding max values
        amp_sort = sorted(amp)
        pos_2, pos_3 = amp_sort[-3], amp_sort[-4]

        #checking for peak index by position
        peak_2, peak_3 = np.where(amp == pos_2), np.where(amp == pos_3)

        #getting peak indices
        peak_index2, peak_index3 = peak_2[0][0], peak_3[0][0]

        #peak power values
        pow_2, pow_3 = np.log(psd[peak_index2]), np.log(psd[peak_index3])
        #peak freq values
        freq_2, freq_3 = freq[peak_index2], freq[peak_index3]
        
        #adding into list
        pow2.append(round(pow_2, 3))
        freq2.append(round(freq_2, 3))
        pow3.append(round(pow_3, 3))
        freq3.append(round(freq_3, 3))
    return pow2, pow3, freq2, freq3

pow2, pow3, freq2, freq3 = extractFFTValues(meal_Matrix)
pow2_2, pow3_2, freq2_2, freq3_2 = extractFFTValues(meal_Matrix_2)


#Extracting for nomeal data
def extractFFTValues_noMeal(noMeal_Matrix):
    freq2 = []
    freq3 = []
    pow2 = []
    pow3 = []
    for i in range(len(noMeal_Matrix)):
        CGM_temp = noMeal_Matrix[i]
        fft = sp.fftpack.fft(CGM_temp)
        amp = np.abs(fft)
        psd = amp ** 2
        freq = sp.fftpack.fftfreq(len(CGM_temp), 1)

        #Sorting and finding max values
        if len(amp) > 5:
            amp_sort = sorted(amp)
            pos_2, pos_3 = amp_sort[-3], amp_sort[-4]

            #checking for peak index by position
            peak_2, peak_3 = np.where(amp == pos_2), np.where(amp == pos_3)

            #getting peak indices
            peak_index2, peak_index3 = peak_2[0][0], peak_3[0][0]

            #peak power values
            pow_2, pow_3 = np.log(psd[peak_index2]), np.log(psd[peak_index3])
            #peak freq values
            freq_2, freq_3 = freq[peak_index2], freq[peak_index3]

            #adding into list
            pow2.append(round(pow_2, 3))
            freq2.append(round(freq_2, 3))
            pow3.append(round(pow_3, 3))
            freq3.append(round(freq_3, 3))
        else:
            pow2.append(0)
            freq2.append(0)
            pow3.append(0)
            freq3.append(0)
    return pow2, pow3, freq2, freq3

pow2_NM, pow3_NM, freq2_NM, freq3_NM = extractFFTValues_noMeal(noMeal_Matrix)
pow2_NM_2, pow3_NM_2, freq2_NM_2, freq3_NM_2 = extractFFTValues_noMeal(noMeal_Matrix_2)



# Feature Extraction IV

Extarcting derivatives and double derivatives.

In [9]:
#Extracting first derivative
def extractFirstDervative(meal_Matrix):
    firstDervative_list = []
    for i in range(len(trueMeal_Matrix)):
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        diff = np.diff(trueMeal_Matrix[i][0:CGMmax_Index], 1)
        diff = diff.tolist()
        if len(diff) > 0:
            diff_avg = sum(diff)/len(diff)
        else:
            diff_avg = 0
        firstDervative_list.append(diff_avg)
    return firstDervative_list

#Extracting second derivative
def extractSecondDervative(meal_Matrix):
    secondDervative_list = []
    for i in range(len(trueMeal_Matrix)):
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        diff = np.diff(trueMeal_Matrix[i][0:CGMmax_Index], 2)
        diff = diff.tolist()
        if len(diff) > 0:
            diff_avg = sum(diff)/len(diff)
        else:
            diff_avg = 0
        secondDervative_list.append(diff_avg)
    return secondDervative_list

firstDervative_list, secondDervative_list = extractFirstDervative(trueMeal_Matrix), extractSecondDervative(trueMeal_Matrix)
firstDervative_list_2, secondDervative_list_2 = extractFirstDervative(trueMeal_Matrix_2), extractSecondDervative(trueMeal_Matrix_2)


###No Meal Data
#Extracting first derivative
def extractFirstDervative_NM(noMeal_Matrix):
    firstDervative_list = []
    for i in range(len(trueMeal_Matrix)):
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        diff = np.diff(trueMeal_Matrix[i][0:CGMmax_Index], 1)
        diff = diff.tolist()
        if len(diff) > 0:
            diff_avg = sum(diff)/len(diff)
        else:
            diff_avg = 0
        firstDervative_list.append(diff_avg)
    return firstDervative_list

#Extracting second derivative
def extractSecondDervative_NM(noMeal_Matrix):
    secondDervative_list = []
    for i in range(len(trueMeal_Matrix)):
        CGMmax = max(trueMeal_Matrix[i])
        CGMmax_Index = trueMeal_Matrix[i].index(CGMmax)
        diff = np.diff(trueMeal_Matrix[i][0:CGMmax_Index], 2)
        diff = diff.tolist()
        if len(diff) > 0:
            diff_avg = sum(diff)/len(diff)
        else:
            diff_avg = 0
        secondDervative_list.append(diff_avg)
    return secondDervative_list

firstDervative_list_NM, secondDervative_list_NM = extractFirstDervative_NM(noMeal_Matrix), extractSecondDervative_NM(noMeal_Matrix)
firstDervative_list_NM_2, secondDervative_list_NM_2 = extractFirstDervative_NM(noMeal_Matrix_2), extractSecondDervative_NM(noMeal_Matrix_2)


# Creating Meal Data Feature 

In [10]:
#Creating Feature Matrices:
data =[]
for i in range(len(trueMeal_Matrix)):
    data.append({
    'Time Difference - Tau' : timeDifferenceList[i],
    'CGM difference - dG' : dG_NormList[i],
    'Power II' : pow2[i],
    'Frequency II' : freq2[i],
    'Power III' : pow3[i],
    'Frequency III' : freq3[i],
    'First Derivative' : firstDervative_list[i],
    'Second Derivative' : secondDervative_list[i]
        })
    
data_2 =[]
for i in range(len(trueMeal_Matrix_2)):
    data_2.append({
    'Time Difference - Tau' : timeDifferenceList_2[i],
    'CGM difference - dG' : dG_NormList_2[i],
    'Power II' : pow2_2[i],
    'Frequency II' : freq2_2[i],
    'Power III' : pow3_2[i],
    'Frequency III' : freq3_2[i],
    'First Derivative' : firstDervative_list_2[i],
    'Second Derivative' : secondDervative_list_2[i]
        })
    
data_3 =[]
for i in range(len(noMeal_Matrix)):
    data_3.append({
    'Time Difference - Tau' : timeDifferenceList_noMeal[i],
    'CGM difference - dG' : dG_NormList_noMeal[i],
    'Power II' : pow2_NM[i],
    'Frequency II' : freq2_NM[i],
    'Power III' : pow3_NM[i],
    'Frequency III' : freq3_NM[i],
    'First Derivative' :  firstDervative_list_NM[i],
    'Second Derivative' : secondDervative_list_NM[i]
        })
    
data_4 =[]
for i in range(len(noMeal_Matrix_2)):
    data_4.append({
    'Time Difference - Tau' : timeDifferenceList_noMeal_2[i],
    'CGM difference - dG' : dG_NormList_noMeal_2[i],
    'Power II' : pow2_NM_2[i],
    'Frequency II' : freq2_NM_2[i],
    'Power III' : pow3_NM_2[i],
    'Frequency III' : freq3_NM_2[i],
    'First Derivative' : firstDervative_list_NM_2[i],
    'Second Derivative' : secondDervative_list_NM_2[i]
        })

mealData_Feature_Matrix = pd.DataFrame(data)
mealData_Feature_Matrix_2 = pd.DataFrame(data_2)
noMealData_Feature_Matrix = pd.DataFrame(data_3)
noMealData_Feature_Matrix_2 = pd.DataFrame(data_4)


# Creating meal and nomeal Feature Matrix

In [11]:
meal_feature_matrix = pd.concat([mealData_Feature_Matrix, mealData_Feature_Matrix_2]).reset_index().drop(columns = 'index')
noMeal_Feature_Matrix = pd.concat([noMealData_Feature_Matrix, noMealData_Feature_Matrix_2]).reset_index().drop(columns = 'index')



In [12]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
meal_feature_matrix.iloc[:,:] = scaler.fit_transform(meal_feature_matrix.iloc[:,:].to_numpy())

# Combining meal and nomeal

In [13]:
meal_feature_matrix['Class Label'] = 1
noMeal_Feature_Matrix['Class Label'] = 0
combined_feature_matrix = pd.concat([meal_feature_matrix, noMeal_Feature_Matrix]).reset_index().drop(columns = 'index')



In [14]:
combined_feature_matrix = combined_feature_matrix.dropna().reset_index().drop(columns = 'index')
combined_feature_matrix

Unnamed: 0,Time Difference - Tau,CGM difference - dG,Power II,Frequency II,Power III,Frequency III,First Derivative,Second Derivative,Class Label
0,0.347675,0.036115,0.336670,0.000,0.380216,0.113333,0.535791,0.380807,1
1,0.564972,0.267746,0.516846,0.000,0.512873,0.113333,0.518227,0.507039,1
2,0.173837,0.084682,0.490479,0.000,0.581230,0.113333,0.675482,0.285044,1
3,0.260756,0.058531,0.680786,0.000,0.776791,0.113333,0.579414,0.383799,1
4,0.695350,0.029888,0.405884,0.500,0.552441,0.446667,0.689207,0.110619,1
...,...,...,...,...,...,...,...,...,...
1825,0.900000,0.200000,9.229000,0.083,8.830000,0.042000,2.111111,0.375000,0
1826,5.400000,0.290000,11.019000,0.042,8.885000,0.125000,1.166667,0.800000,0
1827,6.900000,0.990000,11.727000,0.042,10.082000,0.083000,0.000000,0.000000,0
1828,6.900000,0.440000,10.124000,0.125,9.372000,0.042000,0.375000,3.285714,0


# Implementing Random Forest Classifier

In [15]:
#Creating a dataframe to work on
dataframe = shuffle(combined_feature_matrix, random_state=5).reset_index().drop(columns='index')
X = dataframe.iloc[:, :-1]
Y = dataframe.iloc[:, -1]

#Splitting data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

#Defining classifier
classifier = RandomForestClassifier(criterion = "gini", max_depth = 8, min_samples_split=10, random_state=5)
classifier.fit(X_train, Y_train)

#CLassifier Score
cls_score = classifier.score(X_test, Y_test)

#Cross-Validation Accuracy
cls_CVS_accuracy = np.mean(cross_val_score(classifier, X, Y, cv=5, scoring="accuracy"))

#Cross-Validation Precision
cls_CVS_precision = np.mean(cross_val_score(classifier, X, Y, cv=5, scoring="precision"))

#Cross-Validation Recall
cls_CVS_recall = np.mean(cross_val_score(classifier, X, Y, cv=5, scoring="recall"))

#Cross-Validation F1 score
cls_CVS_f1 = np.mean(cross_val_score(classifier, X, Y, cv=5, scoring="f1"))


# Saving dump into pickle

In [16]:
print(f"Single-Validation score:{(cls_score) * 100:.2f}%")
print(f"Accuracy score: {(cls_CVS_accuracy) * 100:.2f}%")
print(f"Precision score: {(cls_CVS_precision) * 100:.2f}%")
print(f"Recall score: {(cls_CVS_recall) * 100:.2f}%")
print(f"F-1 score: {(cls_CVS_f1) * 100:.2f}%")

pickle.dump(classifier, open('RandomForestClassifier.pkl', "wb"))

Single-Validation score:100.00%
Accuracy score: 100.00%
Precision score: 100.00%
Recall score: 100.00%
F-1 score: 100.00%
