In [92]:
import pandas as pd
import pickle
import statistics
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate, fftpack, stats, signal
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import tree
import os
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Module to interpolate values.
def interpolate_missing_vals(i, identifier):
    s1 = []
    store_val = []
    x = 0
    if identifier == 'meal':
        for elems in meal_df.iloc[i]:
            s1.append(elems)
    else:
        for elems in no_meal_df.iloc[i]:
            s1.append(elems)        
    
    data = {'vals' : s1}
    df = pd.DataFrame(data = data)

    # Interpolate the values.
    #df['vals'].interpolate(method = 'polynomial', order = 3, inplace = True)
    df['vals'].interpolate(method = 'pad', limit = 2, inplace = True)
    store_val = list(df['vals'])
    
    if identifier == 'meal':
        for cols in meal_df.columns:
            meal_df.at[i, cols] = store_val[x]
            x += 1
    else:
        for cols in no_meal_df.columns:
            no_meal_df.at[i, cols] = store_val[x]
            x += 1

# Module to perform polynomial fit
# to get the coefficient values.
def perform_polyfit(i):
    colmns = ['coeff_0', 'coeff_1', 'coeff_2']
    co_eff = []
    itr = 0
    vals = list(new_training.iloc[i])
    interval = [j * 5 for j in range(0, len(new_training.iloc[i]))]
    p_fit = list(np.polyfit(interval, vals, 2))
    co_eff.extend(p_fit)
    # Plot chart
    # Uncomment the below lines to
    # plot the curve.
    #plt.plot(p_fit)
    #plt.show()
    for cols in colmns:
        feature_df.at[i, cols] = co_eff[itr]
        itr += 1

# Module to perform fft.
def perform_fft(i):
    itr = 0
    vals = list(new_training.iloc[i])
    fft_plot = abs(fftpack.fft(vals))
    fft_vals = sorted(set(fft_plot), reverse = True)
    feature_df.at[i, 'high_1'] = fft_vals[1]
    feature_df.at[i, 'high_2'] = fft_vals[2]
    feature_df.at[i, 'high_3'] = fft_vals[3]
    # Plot chart
    # Uncomment the below lines to
    # plot the curve.
    #print('FFT', [fft_vals[1], fft_vals[2], fft_vals[2]])
    #plt.plot(fft_vals[1:])
    #plt.show()

# Module to perform CGM velocity method.
def cgm_velocity(i):
    window_size = 3
    time_line = 15
    velocity = []
    val_store = new_training.iloc[i]
    for j in range(0, len(new_training.iloc[i]) - window_size):
        interim = (val_store[j] - val_store[j + window_size]) / time_line
        velocity.append(interim)
    # Find standard deviation of the series.
    s_dev = pd.Series(velocity).std()
    mean_val = pd.Series(velocity).mean()
    median_val = pd.Series(velocity).median()
    feature_df.at[i, 'cgm_velocity_stdv'] = s_dev
    feature_df.at[i, 'cgm_velocity_mean'] = mean_val
    feature_df.at[i, 'cgm_velocity_median'] = median_val
    #plt.plot(velocity)
    #plt.show()

# Module to perform Welch method.
def perform_welch(i):
    store_interim = new_training.iloc[i]
    hz, welch_values  = np.array((signal.welch(store_interim)))
    welch_std = pd.Series(welch_values).std()
    welch_mean = pd.Series(welch_values).mean()
    welch_median = pd.Series(welch_values).median()
    feature_df.at[i, 'max_welch'] = max(welch_values)
    feature_df.at[i, 'std_welch'] = welch_std
    feature_df.at[i, 'mean_welch'] = welch_mean
    feature_df.at[i, 'median_welch'] = welch_median
    #plt.plot(hz, welch_values)
    #plt.show()

# Module to perform PCA.
def performPCA():
    pc_features = feature_df.columns
    feature_matrix = feature_df.loc[:, pc_features].values
    # Normalize the feature values.
    feature_matrix = StandardScaler().fit_transform(feature_matrix)
    pca_cons = PCA(n_components = 10)
    principal_components = pca_cons.fit_transform(feature_matrix)
    final_component = pd.DataFrame(data = principal_components, 
                                   columns = ['component_1', 'component_2', 
                                              'component_3', 'component_4', 
                                              'component_5', 'component_6', 
                                              'component_7', 'component_8', 
                                              'component_9', 'component_10'])
    pca_var = pca_cons.explained_variance_ratio_
    pc_comps = (abs(pca_cons.components_))
    #print(abs(pca_cons.components_))
    pca_var = ['{:f}'.format(item) for item in pca_var]
    #print(pca_var)
    return final_component

def modelDecisionTree(X,y):
    scores = []
    accuracy = []
    model = tree.DecisionTreeClassifier()
    cv = KFold(n_splits=10, random_state=42, shuffle=True)
    for train_index, test_index in cv.split(X):
        X_train, X_test, y_train, y_test = X.iloc[train_index], X.iloc[test_index], y.iloc[train_index], y.iloc[
            test_index]
        model.fit(X_train, y_train)
        predicted = model.predict(X_test)
        accuracy.append(metrics.accuracy_score(y_test, predicted))
        scores.append(model.score(X_test, y_test))

    print("DecisionTree")
    print("Max- " + str(np.min(accuracy)))
    print("Mean- "+ str(np.mean(accuracy)))
    print("Min- "+ str( np.max(accuracy)))

    return model

def execute_classifiers(model, X_train, X_test, Y_train, Y_test):
    model.fit(X_train,Y_train)
    score = model.score(X_test, Y_test)
    predict = model.predict(X_test)
    accuracy_score = metrics.accuracy_score(Y_test, predict)
    return accuracy_score

def execute_fit(model, X_train, X_test, Y_train, Y_test):
    model.fit(X_train,Y_train)
    return model

if __name__ == '__main__':
    #Store accuracy scores
    rforest = []
    svc = []
    lregrr = []
    mlp = []
    check_max = {}
    checker = 0
    algos = ''
    
    # Module to read all the csv files
    col_names = []
    for i in range(1, 32):
        col_name_val = 'c' + str(i)
        col_names.append(col_name_val)
    #col_names.append('class')
    # Read the meal data
    meal1 = pd.read_csv('Resources/MealNoMealData/mealData1.csv', names = col_names)
    meal2 = pd.read_csv('Resources/MealNoMealData/mealData2.csv', names = col_names)
    meal3 = pd.read_csv('Resources/MealNoMealData/mealData3.csv', names = col_names)
    meal4 = pd.read_csv('Resources/MealNoMealData/mealData4.csv', names = col_names)
    meal5 = pd.read_csv('Resources/MealNoMealData/mealData5.csv', names = col_names)
    meal_df = pd.concat([meal1, meal2, meal3, meal4, meal5], ignore_index = True)

    # Read no meal data
    no_meal1 = pd.read_csv('Resources/MealNoMealData/Nomeal1.csv', names = col_names)
    no_meal2 = pd.read_csv('Resources/MealNoMealData/Nomeal2.csv', names = col_names)
    no_meal3 = pd.read_csv('Resources/MealNoMealData/Nomeal3.csv', names = col_names)
    no_meal4 = pd.read_csv('Resources/MealNoMealData/Nomeal4.csv', names = col_names)
    no_meal5 = pd.read_csv('Resources/MealNoMealData/Nomeal5.csv', names = col_names)
    no_meal_df = pd.concat([no_meal1, no_meal2, no_meal3, no_meal4, no_meal5], ignore_index = True)
    
    # Create the feature data frame.
    feature_df = pd.DataFrame(columns = ['coeff_0', 'coeff_1', 'coeff_2', 'high_1', 'high_2', 'high_3', 'cgm_velocity_stdv', 'cgm_velocity_mean', 'cgm_velocity_median', 'max_welch', 'std_welch', 'mean_welch', 'median_welch'])
    # Interpolate the missing values in
    # meal data.
    for i in range(len(meal_df)):
        interpolate_missing_vals(i, 'meal')
    
    # Interpolate the missing values in
    # no meal data.
    for i in range(len(no_meal_df)):
        interpolate_missing_vals(i, 'no_meal')

    # Remove all NA values from the dataframe
    meal_df = meal_df.dropna()
    no_meal_df = no_meal_df.dropna()

    # Add classs
    meal_df['class'] = 1
    no_meal_df['calss'] = 0

    # Create the training dataframe
    traning_interim_df = pd.concat([meal_df, no_meal_df])
    # Add classs 
    meal_df['class'] = '1'
    no_meal_df['class'] = '0'

    # Create the training dataframe
    traning_interim_df = pd.concat([meal_df, no_meal_df], ignore_index = True, sort = False)
    
    # Add Features
    new_training = traning_interim_df.loc[: , 'c1' : 'c31'].copy()

    # Extract class labels
    class_labels['class'] = traning_interim_df['class'].copy()

    # Perform Polyfit
    for i in range(0, len(new_training)):
        perform_polyfit(i)

    # Perform polyfit feature
    for i in range(0, len(new_training)):
        perform_fft(i)
    
    # Perform CGM velocity feature
    for i in range(0, len(new_training)):
        cgm_velocity(i)
    
    # Perform welch feature
    for i in range(0, len(new_training)):
        perform_welch(i)
    
    # Perform PCA
    final_df = performPCA()

    # Perform k fold
    k_cross_ret = KFold(n_splits = 10,random_state = 42, shuffle = True)
    for train_index, test_index in k_cross_ret.split(p_df):
        train_feature, test_feature, train_label, test_label = p_df.iloc[train_index],p_df.iloc[test_index],\
                                       class_labels['class'].iloc[train_index],class_labels['class'].iloc[test_index]
    
        rforest.append(execute_classifiers(RandomForestClassifier(n_estimators = 550), train_feature, test_feature, train_label, test_label))
    
        svc.append(execute_classifiers(SVC(gamma='scale'),train_feature, test_feature, train_label, test_label))
    
        lregrr.append(execute_classifiers(LogisticRegression(solver='lbfgs'),train_feature, test_feature, train_label, test_label))
    
        mlp.append(execute_classifiers(MLPClassifier(hidden_layer_sizes=(300), max_iter=5000,activation = 'relu',solver='adam',random_state=1)\
                             ,train_feature, test_feature, train_label, test_label))
    
    x = modelDecisionTree(p_df, class_labels)
    print('------------')
    check_max['random_forrest'] = statistics.mean(rforest)
    print('Random forest')
    print('Max-', max(rforest))
    print('Mean-', statistics.mean(rforest))
    print('Min-', min(rforest))
    print('------------')
    check_max['svm'] = statistics.mean(svc)
    print('SVM')
    print('Max-', max(svc))
    print('Mean-', statistics.mean(svc))
    print('Min-', min(svc))
    print('------------')
    check_max['logistic'] = statistics.mean(lregrr)
    print('Logistic regression')
    print('Max-', max(lregrr))
    print('Mean-', statistics.mean(lregrr))
    print('Min-', min(lregrr))
    print('------------')
    check_max['mlp'] = statistics.mean(mlp)
    print('mlp')
    print('Max-', max(mlp))
    print('Mean-', statistics.mean(mlp))
    print('Min-', min(mlp))
    print('------------')
    # Find algo with best accuracy and
    # fit the model.
    filename = 'trained.pkl'
    for a, b in check_max.items():
        if b > checker:
            algos = a
            checker = b
    if algos == 'random_forrest':
        m_fit = execute_fit(RandomForestClassifier(n_estimators = 550), train_feature, test_feature, train_label, test_label)
        pickle.dump(m_fit, open(filename, 'wb'))

    elif algos == 'svm':
        m_fit = execute_fit(SVC(gamma='scale'),train_feature, test_feature, train_label, test_label)
        pickle.dump(m_fit, open(filename, 'wb'))
    
    elif algos == 'logistic':
        m_fit = execute_fit(LogisticRegression(solver='lbfgs'),train_feature, test_feature, train_label, test_label)
        pickle.dump(m_fit, open(filename, 'wb'))

    else:
        m_fit = execute_fit(MLPClassifier(hidden_layer_sizes=(300), max_iter=5000,activation = 'relu',solver='adam',random_state=1)\
                             ,train_feature, test_feature, train_label, test_label)
        pickle.dump(m_fit, open(filename, 'wb'))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  .format(nperseg, input_length))


DecisionTree
Min : 0.4090909090909091
Mean : 0.5643939393939393
Max : 0.7727272727272727
------------
Random forest
Max- 0.75
Mean- 0.6301515151515151
Min- 0.5111111111111111
------------
SVM
Max- 0.7045454545454546
Mean- 0.6301010101010102
Min- 0.5333333333333333
------------
Logistic regression
Max- 0.7272727272727273
Mean- 0.6345959595959596
Min- 0.5333333333333333
------------
mlp
Max- 0.7272727272727273
Mean- 0.6388383838383839
Min- 0.5333333333333333
------------
