In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
import sklearn.preprocessing as pre
from pandas import DataFrame
from pandas import concat
from scipy.stats import linregress as lr
from scipy.signal import find_peaks as find_peaks
from scipy.fftpack import fft, ifft,rfft
from sklearn.decomposition import PCA
from scipy.optimize import curve_fit
import pickle
from sklearn.model_selection import KFold
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier, RandomTreesEmbedding, AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

# 1. Feature Calculation

In [2]:
COLUMNS = np.array(['Slope_minmax', 
           'PeakVal1_error', 'PeakVal2_error', 'PeakHt1_error', 'PeakHt2_error', 
           'Min1_window', 'Min2_window', 'Max1_window', 'Max2_window', 'Var1_window', 'Var2_window', 'Mean1_window', 'Mean2_window', 
           'Sig_coef1','Sig_coef2','Sig_coef3','Sig_coef4',
           'Max_fft', 'Min_fft', 'Mean_fft', 'Var_fft'
          ])
cgm_file_name = 'Data1.csv'
output_file_name = 'Output1.csv'
validate_file_name = 'Truth1.csv'

In [3]:
def CalcFeatureSet1(cgmNorm_np, cgmSeries_np):
    maxs = np.argmax(cgmNorm_np, axis=1)
    mins = [np.argmin(cgmNorm_np[i, maxs[i]:])+maxs[i] for i in range(len(maxs))]

    slopes = []
    time_diffs = []
    for i in range(len(maxs)):
        slope = (cgmNorm_np[i][maxs[i]]-cgmNorm_np[i][mins[i]])/(cgmSeries_np[maxs[i]]-cgmSeries_np[mins[i]])
        time_diffs.append(cgmSeries_np[maxs[i]]-cgmSeries_np[mins[i]])
        slopes.append(slope)

    slopes = np.nan_to_num(slopes)
    time_diffs = np.nan_to_num(time_diffs)
    reg_window_size = 4
    reg_errors = []
    peak_values = []
    peak_heights = []
    peak_time_diffs = []
    peak_times = []
    for j in range(len(cgmNorm_np)):
        errors = np.array([])
        for i in range(len(cgmNorm_np[j])-reg_window_size):
            times = cgmSeries_np[i:i+reg_window_size-1]
            if np.isnan(times).any():
                errors = np.append(errors, -1)
                continue
            coeffs = np.polyfit(times, cgmNorm_np[j][i:i+reg_window_size-1], 1)
            poly = np.poly1d(coeffs)
            error = poly(cgmSeries_np[i+reg_window_size])-cgmNorm_np[j][i+reg_window_size];
            errors = np.append(errors, error)
        peaks, height_dict = find_peaks(errors, height = 0)
        heights = height_dict['peak_heights']
        sorted_args = heights.argsort()
        peaks = peaks[sorted_args]
        peaks = peaks[-2:]
        heights = heights[sorted_args]
        heights = heights[-2:]
        values = cgmNorm_np[j][peaks+reg_window_size-1]
        times1 = cgmSeries_np[peaks+reg_window_size]
        times2 = cgmSeries_np[peaks+reg_window_size-1]
        reg_errors.append(errors)
        while(len(values) < 2):
            values = np.append(values, 0)
            heights = np.append(heights, 0)
            times1 = np.append(times, 0)
            times2 = np.append(times2, 0)
        peak_values.append(values)
        peak_heights.append(heights)
        peak_time_diffs.append(times1)
        peak_times.append(times2)
    reg_errors = np.array(reg_errors)
    matrix = []
    for i in range(0, len(cgmNorm_np)):
        matrix_row = np.array([])
        matrix_row = np.append(matrix_row, slopes[i])
#         matrix_row = np.append(matrix_row, time_diffs[i])
        matrix_row = np.append(matrix_row, peak_values[i])
        matrix_row = np.append(matrix_row, peak_heights[i])
#         matrix_row = np.append(matrix_row, peak_times[i])
        matrix.append(matrix_row)
    matrix = np.array(matrix)
    return matrix

def CalcFeatureSet2(cgmNorm_np, cgmSeries_np):
    window_mins = []
    window_maxs = []
    window_means = []
    window_vars = []
    for i in range(0, len(cgmNorm_np)):
        window_input = DataFrame(cgmNorm_np[i][::-1])
        width=5
        shifted=window_input.shift(width - 1)
        window=shifted.rolling(window=width)
        dataframe=concat([window.var(), window.min(),  window.mean(), window.max() ], axis=1)
        dataframe.columns = ['var', 'min', 'mean', 'max']
        window_features = dataframe.nlargest(2,'var')
        window_values = window_features.values
        window_mins.append([window_values[0][1], window_values[1][1]])
        window_maxs.append([window_values[0][3], window_values[1][3]])
        window_vars.append([window_values[0][0], window_values[1][0]])
        window_means.append([window_values[0][2], window_values[1][2]])
    
    matrix = []
    for i in range(0, len(cgmNorm_np)):
        matrix_row = np.array([])
        matrix_row = np.append(matrix_row, window_mins[i])
        matrix_row = np.append(matrix_row, window_maxs[i])
        matrix_row = np.append(matrix_row, window_vars[i])
        matrix_row = np.append(matrix_row, window_means[i])
        matrix.append(matrix_row)
    matrix = np.array(matrix)
    return matrix

def sigmoid(x, L ,x0, k, b):
    y = L / (1 + np.exp(-k*(x-x0)))+b
    return (y)

def CalcFeatureSet3(cgmNorm_np, cgmSeries_np):
    n_series = []
    n_datenum = []
    sig1 = []
    sig2 = []
    sig3 = []
    sig4 = []
    for i in range(0, len(cgmNorm_np)):
        idx = np.isfinite(cgmSeries_np) & np.isfinite(cgmNorm_np[i])
        n_series.append(cgmNorm_np[i][idx])  
        n_datenum.append(cgmSeries_np[idx])
    for i in range(0,len(cgmNorm_np)):
        if(len(n_series[i]) !=0 ):
            try:
                p0 = [max(n_series[i]), np.median(n_datenum[i]),250,min(n_series[i])] 
                popt, pcov = curve_fit(sigmoid, n_datenum[i], n_series[i],p0,method='trf')
            except: 
                popt=[0,0,0,0]

            sig1.append(popt[0])
            sig2.append(popt[1])
            sig3.append(popt[2])
            sig4.append(popt[3])
    
    matrix = []
    for i in range(0, len(cgmNorm_np)):
        matrix_row = np.array([])
        matrix_row = np.append(matrix_row, sig1[i])
        matrix_row = np.append(matrix_row, sig2[i])
        matrix_row = np.append(matrix_row, sig3[i])
        matrix_row = np.append(matrix_row, sig4[i])
        matrix.append(matrix_row)
    matrix = np.array(matrix)
    return matrix

def CalcFeatureSet4(cgmNorm_np, cgmSeries_np):
    Feature_vector=[]
    for i in range(0, len(cgmNorm_np)):
    #FFT
        fastfouriertransform=rfft(cgmNorm_np[i])
        fft_max=np.nanmax(fastfouriertransform)
        s=np.where(fastfouriertransform == fft_max)
        fft_min=np.nanmin(fastfouriertransform)
        s=np.where(fastfouriertransform == fft_min)
        fft_mean=np.nanmean(fastfouriertransform)
        fft_variance=np.nanvar(fastfouriertransform)
        Feature_vector.append(np.array([fft_max,fft_min,fft_mean,fft_variance]))
    matrix = np.array(Feature_vector)
    return matrix

def MergedFeatures(cgmNorm_np, cgmSeries_np):
    feature_set_1 = CalcFeatureSet1(cgmNorm_np, cgmSeries_np)
    feature_set_2 = CalcFeatureSet2(cgmNorm_np, cgmSeries_np)
    feature_set_3 = CalcFeatureSet3(cgmNorm_np, cgmSeries_np)
    feature_set_4 = CalcFeatureSet4(cgmNorm_np, cgmSeries_np)
    features = np.concatenate((feature_set_1, feature_set_2), axis=1)
    features = np.concatenate((features, feature_set_3), axis=1)
    features = np.concatenate((features, feature_set_4), axis=1)
    features = np.nan_to_num(features)
    return features

def GenerateDF(features, columns):
    feature_df = pd.DataFrame(features, columns=columns)
    return feature_df

def NormalizeDF(feature_df, columns, max_scale):
    for i in columns:
        feature_df[i] = feature_df[i]/max_scale[i]
    return feature_df

In [4]:
cgmData = pd.read_csv(cgm_file_name, names=list(range(50)))
cgmData = cgmData.dropna(axis='columns', how='all')
cgmData = cgmData.mask(cgmData.eq(-1)).ffill(axis=1)
cgmData = cgmData.mask(cgmData.eq(-1)).bfill(axis=1)

zero_entries = cgmData.isna().any(axis=1)
cgmData = cgmData[zero_entries == False]

cgmValues_np = cgmData.values
cgmNorm_np = cgmValues_np/400.0

length = len(cgmNorm_np[0])
cgmSeries_np = [0.0833*(length-i-1) for i in range(0, length)]
cgmSeries_np = np.array(cgmSeries_np)
features = MergedFeatures(cgmNorm_np, cgmSeries_np)
features_df = GenerateDF(features, COLUMNS)
Scale_file = open('DataScale.pkl', 'rb')
max_scale = pickle.load(Scale_file)
Scale_file.close()
normal_df = NormalizeDF(features_df, COLUMNS, max_scale)
display(normal_df)
normal_df.to_csv('tempFeatures.csv', index=False)

  


Unnamed: 0,Slope_minmax,PeakVal1_error,PeakVal2_error,PeakHt1_error,PeakHt2_error,Min1_window,Min2_window,Max1_window,Max2_window,Var1_window,...,Mean1_window,Mean2_window,Sig_coef1,Sig_coef2,Sig_coef3,Sig_coef4,Max_fft,Min_fft,Mean_fft,Var_fft
0,0.223958,0.786632,0.6925,0.379195,0.418994,0.672222,0.663889,0.7300,0.726817,0.151981,...,0.702505,0.684127,-0.000179,0.157098,0.005127,0.001196,0.704449,-0.305146,0.634014,0.501904
1,0.181818,0.370180,0.5000,0.197987,0.242086,0.419444,0.391667,0.4900,0.466165,0.091495,...,0.451461,0.428571,-0.000178,0.105077,0.499994,0.000705,0.377450,-0.368154,0.351433,0.148848
2,0.193182,0.519280,0.4700,0.308725,0.275605,0.250000,0.230556,0.3975,0.358396,0.190450,...,0.328288,0.292593,0.000544,0.101382,0.003870,0.000346,0.376598,-0.543992,0.284399,0.155170
3,0.245536,0.383033,0.5350,0.305369,0.175047,0.341667,0.327778,0.4225,0.403509,0.098461,...,0.382568,0.360847,0.000301,0.147579,0.124742,0.000625,0.433015,-0.226763,0.418339,0.191966
4,0.100216,0.336761,0.4000,0.171141,0.100559,0.311111,0.297222,0.3650,0.348371,0.049465,...,0.338205,0.322222,0.000349,0.079098,0.002012,0.000404,0.374979,-0.273501,0.318875,0.145182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,0.357143,0.295630,0.4300,0.221477,0.407821,0.319444,0.355556,0.4300,0.463659,0.152501,...,0.381524,0.410582,0.000496,0.170006,0.004147,0.000497,0.354440,-0.067711,0.427625,0.130693
87,0.000000,0.398458,0.3925,0.020134,0.024209,0.463889,0.475000,0.4725,0.473684,0.019885,...,0.461900,0.479894,-0.000113,0.022456,0.499995,0.000797,0.426197,-0.012094,0.452646,0.180966
88,0.153409,0.398458,0.2450,0.278523,0.245810,0.186111,0.205556,0.3225,0.350877,0.188111,...,0.253653,0.295767,0.000422,0.061653,0.004511,0.000282,0.349753,-0.328968,0.254037,0.130082
89,0.217105,0.385604,0.4175,0.486577,0.279330,0.466667,0.425000,0.5300,0.508772,0.105661,...,0.485386,0.460847,0.000429,0.170175,0.004123,0.000658,0.445543,-0.099211,0.488674,0.201235


# 2. PCA Transform

In [5]:
allData = pd.read_csv('tempFeatures.csv')
PCA_filename = 'PCA.pkl'
def Transform(data, PCA_filename):
    PCA_file = open(PCA_filename, 'rb')
    pca = pickle.load(PCA_file)
    PCA_file.close()
    return pca.transform(data)
finalData = Transform(allData, PCA_filename)

# 3. Loading Models and Evaluating Data

In [6]:
class Classifiers:
    def __init__(self):
        self.Anirudh_filename = 'Anirudh.pkl'
        self.Omkar_filename = 'Omkar.pkl'
        self.Ananth_filename = 'Ananth.pkl'
        self.Vedant_filename = 'Vedant.pkl'
        return
    
    def validateClassifier_Anirudh(self, test_data):
        labels = self.Anirudh_classifier.predict(test_data)
        return labels
    
    def validateClassifier_Vedant(self, test_data):
        labels = self.Vedant_classifier.predict(test_data)
        return labels
    
    def validateClassifier_Omkar(self, test_data):
        labels = self.Omkar_classifier.predict(test_data)
        return labels
    
    def validateClassifier_Ananth(self, test_data):
        labels = self.Ananth_classifier.predict(test_data)
        return labels
    
    def loadClassifier_Anirudh(self):
        save_file = open(self.Anirudh_filename, 'rb')
        self.Anirudh_classifier = pickle.load(save_file)
        save_file.close()
        return
    
    def loadClassifier_Vedant(self):
        save_file = open(self.Vedant_filename, 'rb')
        self.Vedant_classifier = pickle.load(save_file)
        save_file.close()
        return
    
    def loadClassifier_Omkar(self):
        save_file = open(self.Omkar_filename, 'rb')
        self.Omkar_classifier = pickle.load(save_file)
        save_file.close()
        return
    
    def loadClassifier_Ananth(self):
        save_file = open(self.Ananth_filename, 'rb')
        self.Ananth_classifier = pickle.load(save_file)
        save_file.close()
        return
    
    def loadAllClassifiers(self):
        self.loadClassifier_Anirudh()
        self.loadClassifier_Vedant()
        self.loadClassifier_Omkar()
        self.loadClassifier_Ananth()

In [7]:
models = Classifiers()
models.loadAllClassifiers()
labels_Anirudh = models.validateClassifier_Anirudh(finalData)
labels_Vedant = models.validateClassifier_Vedant(finalData)
labels_Omkar = models.validateClassifier_Omkar(finalData)
labels_Ananth = models.validateClassifier_Ananth(finalData)

labels_matrix = np.array([np.array(labels_Anirudh)])
labels_matrix = np.concatenate((labels_matrix, np.array([labels_Vedant])), axis=0)
labels_matrix = np.concatenate((labels_matrix, np.array([labels_Omkar])), axis=0)
labels_matrix = np.concatenate((labels_matrix, np.array([labels_Ananth])), axis=0)
labels_matrix = np.transpose(labels_matrix)

labels_df = pd.DataFrame(labels_matrix, columns=['Anirudh', 'Vedant', 'Omkar', 'Ananth'])
display(labels_df)
labels_df.to_csv(output_file_name, index=False)

Unnamed: 0,Anirudh,Vedant,Omkar,Ananth
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0
...,...,...,...,...
86,1.0,0.0,1.0,1.0
87,1.0,1.0,1.0,1.0
88,0.0,1.0,0.0,1.0
89,0.0,0.0,0.0,1.0
