In [25]:
import pandas as pd
import pickle
import statistics
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate, fftpack, stats, signal
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import os
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Module to interpolate values.
def interpolate_missing_vals(i):
    s1 = []
    store_val = []
    x = 0
    for elems in if_meal_df.iloc[i]:
        s1.append(elems)
        
    data = {'vals' : s1}
    df = pd.DataFrame(data = data)

    # Interpolate the values.
    #df['vals'].interpolate(method = 'polynomial', order = 3, inplace = True)
    df['vals'].interpolate(method = 'pad', limit = 2, inplace = True)
    store_val = df['vals'].tolist()
    
    for cols in if_meal_df.columns:
        if_meal_df.at[i, cols] = store_val[x]
        x += 1

# Module to perform polynomial fit
# to get the coefficient values.
def perform_polyfit(i):
    colmns = ['coeff_0', 'coeff_1', 'coeff_2']
    co_eff = []
    itr = 0
    vals = new_training.iloc[i].tolist()
    interval = [j * 5 for j in range(0, len(new_training.iloc[i]))]
    p_fit = np.polyfit(interval, vals, 2).tolist()
    co_eff.extend(p_fit)
    # Plot chart
    # Uncomment the below lines to
    # plot the curve.
    #plt.plot(p_fit)
    #plt.show()
    for cols in colmns:
        feature_df.at[i, cols] = co_eff[itr]
        itr += 1

# Module to perform fft.
def perform_fft(i):
    itr = 0
    vals = new_training.iloc[i].tolist()
    fft_plot = abs(fftpack.fft(vals))
    fft_vals = sorted(set(fft_plot), reverse = True)
    try:
        feature_df.at[i, 'high_1'] = fft_vals[1]
        feature_df.at[i, 'high_2'] = fft_vals[2]
        feature_df.at[i, 'high_3'] = fft_vals[3]
    except:
        feature_df.at[i, 'high_1'] = 0
        feature_df.at[i, 'high_2'] = 0
        feature_df.at[i, 'high_3'] = 0
    # Plot chart
    # Uncomment the below lines to
    # plot the curve.
    #print('FFT', [fft_vals[1], fft_vals[2], fft_vals[2]])
    #plt.plot(fft_vals[1:])
    #plt.show()

# Module to perform CGM velocity method.
def cgm_velocity(i):
    window_size = 3
    time_line = 15
    velocity = []
    val_store = new_training.iloc[i]
    for j in range(0, len(new_training.iloc[i]) - window_size):
        interim = (val_store[j] - val_store[j + window_size]) / time_line
        velocity.append(interim)
    # Find standard deviation of the series.
    s_dev = pd.Series(velocity).std()
    mean_val = pd.Series(velocity).mean()
    median_val = pd.Series(velocity).median()
    feature_df.at[i, 'cgm_velocity_stdv'] = s_dev
    feature_df.at[i, 'cgm_velocity_mean'] = mean_val
    feature_df.at[i, 'cgm_velocity_median'] = median_val
    #plt.plot(velocity)
    #plt.show()

# Module to perform Welch method.
def perform_welch(i):
    store_interim = new_training.iloc[i]
    hz, welch_values  = np.array((signal.welch(store_interim)))
    welch_std = pd.Series(welch_values).std()
    welch_mean = pd.Series(welch_values).mean()
    welch_median = pd.Series(welch_values).median()
    feature_df.at[i, 'max_welch'] = max(welch_values)
    feature_df.at[i, 'std_welch'] = welch_std
    feature_df.at[i, 'mean_welch'] = welch_mean
    feature_df.at[i, 'median_welch'] = welch_median
    #plt.plot(hz, welch_values)
    #plt.show()

# Module to perform PCA.
def performPCA():
    pc_features = feature_df.columns
    feature_matrix = feature_df.loc[:, pc_features].values
    # Normalize the feature values.
    feature_matrix = StandardScaler().fit_transform(feature_matrix)
    pca_cons = PCA(n_components = 10)
    principal_components = pca_cons.fit_transform(feature_matrix)
    final_component = pd.DataFrame(data = principal_components, 
                                   columns = ['component_1', 'component_2', 
                                              'component_3', 'component_4', 
                                              'component_5', 'component_6', 
                                              'component_7', 'component_8', 
                                              'component_9', 'component_10'])
    pca_var = pca_cons.explained_variance_ratio_
    pc_comps = (abs(pca_cons.components_))
    #print(abs(pca_cons.components_))
    pca_var = ['{:f}'.format(item) for item in pca_var]
    #print(pca_var)
    return final_component

def add_bins(loc_df):
    # Add bins
    for k in range(0, len(loc_df)):
        if loc_df['true_amt'][k] == 0:
            loc_df['bins'][k] = 'c_0' 
        elif loc_df['true_amt'][k] > 0 and loc_df['true_amt'][k] <= 20:
            loc_df['bins'][k] = 'c_0_20'
        elif loc_df['true_amt'][k] > 20 and loc_df['true_amt'][k] <= 40:
            loc_df['bins'][k] = 'c_20_40'
        elif loc_df['true_amt'][k] > 40 and loc_df['true_amt'][k] <= 60:
            loc_df['bins'][k] = 'c_40_60' 
        elif loc_df['true_amt'][k] > 60 and loc_df['true_amt'][k] <= 80:
            loc_df['bins'][k] = 'c_60_80'
        else:
            loc_df['bins'][k] = 'c_80_100'
    return loc_df

if __name__ == '__main__':
    
    # Module to read all the csv files
    col_names = []
    for i in range(1, 32):
        col_name_val = 'c' + str(i)
        col_names.append(col_name_val)
    
    # Read the meal amount data.
    meal_amt1 = pd.read_csv('Dataset/mealAmountData1.csv', names = ['Meal_Amount'])
    
    # Read the files
    list_files = os.listdir('test_files')
    read_name = 'test_files/' + list_files[0]
    if_meal_df = pd.read_csv(read_name, names = col_names)
    for i in range(1, len(list_files)):
        read_name = 'test_files/' + list_files[i]
        test2 = pd.read_csv(read_name, names = col_names)
        if_meal_df = pd.concat([if_meal_df, test2], ignore_index = True)
    if_meal_df['amount'] = meal_amt1[0 : 51]
    # Create the feature data frame.
    feature_df = pd.DataFrame(columns = ['coeff_0', 'coeff_1', 'coeff_2', 'high_1', 'high_2', 'high_3', 'cgm_velocity_stdv', 'cgm_velocity_mean', 'cgm_velocity_median', 'max_welch', 'std_welch', 'mean_welch', 'median_welch'])
    # Interpolate the missing values in
    # meal data.
    for i in range(len(if_meal_df)):
        interpolate_missing_vals(i)
    
    # Remove all NA values from the dataframe
    if_meal_df = if_meal_df.dropna()
    
    # Create the training dataframe
    traning_interim_df = if_meal_df.copy()

    # Add Features
    new_training = traning_interim_df.loc[: , 'c1' : 'c31'].copy()
    # Perform Polyfit
    for i in range(0, len(new_training)):
        perform_polyfit(i)

    # Perform polyfit feature
    for i in range(0, len(new_training)):
        perform_fft(i)
    
    # Perform CGM velocity feature
    for i in range(0, len(new_training)):
        cgm_velocity(i)
    
    # Perform welch feature
    for i in range(0, len(new_training)):
        perform_welch(i)
    
    # Perform PCA
    final_df_km = performPCA()
    final_df_db = performPCA()
    
    loaded_model_km = pickle.load(open('pkl_files/kmean_fit.pkl', 'rb'))
    result_km = loaded_model_km.predict(final_df_km)
    
    loaded_model_db = pickle.load(open('pkl_files/dbscan_fit.pkl', 'rb'))
    result_db = loaded_model_db.predict(final_df_db)
    
    for i in range(0, len(result_db)):
        if result_db[i] == 'c_0':
            result_db[i] = 1
        elif result_db[i] == 'c_0_20':
            result_db[i] = 2
        elif result_db[i] == 'c_20_40':
            result_db[i] = 3
        elif result_db[i] == 'c_40_60':
            result_db[i] = 4
        elif result_db[i] == 'c_60_80':
            result_db[i] = 5
        else:
            result_db[i] = 6

    for i in range(0, len(result_km)):
        if result_km[i] == 'c_0':
            result_km[i] = 1
        elif result_km[i] == 'c_0_20':
            result_km[i] = 2
        elif result_km[i] == 'c_20_40':
            result_km[i] = 3
        elif result_km[i] == 'c_40_60':
            result_km[i] = 4
        elif result_km[i] == 'c_60_80':
            result_km[i] = 5
        else:
            result_km[i] = 6
    
    # Create a csv file
    df_op = pd.DataFrame(columns = ['dbscan', 'kmeans'])
    df_op['dbscan'] = result_db
    df_op['kmeans'] = result_km
    df_op.to_csv('result_output.csv', index=False)

  .format(nperseg, input_length))
