In [23]:
import pandas as pd
import numpy as np
#get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
#from sklearn.metrics import r2_score
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
#from keras.optimizers import Adam
from keras.layers import LSTM

import seaborn as sns


from scipy import stats

import random
import re
from dateutil.parser import parse

import warnings  
warnings.filterwarnings('ignore')

import os

import math

from statistics import mean

from datetime import timedelta
from datetime import datetime

cwd = os.getcwd()
def_training = pd.read_csv(cwd+'/GlucoCheck/Data/consolidatedDataForPaper.csv')
consolidatedData = pd.read_csv(cwd+'/GlucoCheck/Data/consolidatedDataForPackage.csv')
consolidated_meta = pd.read_csv(cwd+'/GlucoCheck/Data/consolidatedMetadata.csv')



In [24]:
from GlucoCheck.glucoCheck import glucoCheckOps

In [25]:
def createGap(df,start,end):
    """
    Creating a Gap
    input:
        start: seed
        end: seed + gap
    output:
        df: dataframe with index => DisplayTime value => GlucoseValues and a gap from start to end (inputs)
    """

    #df = readData()
    l = len(df.index)
    if end>l:
        end = l
        
    for i in range(start,end):
        df['GlucoseValue'][i]=0
    
    return df

In [26]:
def fullDay(data):
    dates = list()
    data = data.reset_index(drop=True)
    for i in range(0,len(data.index)):
        dates.append(data['Display Time'][i].date())
    data['Dates'] = dates
    end = data['Dates'].iloc[-1]
    start = data['Dates'].iloc[0]

    indexVals = data[ data['Dates'] == start ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    indexVals = data[ data['Dates'] == end ].index
    # indexVals
    data.drop(indexVals , inplace=True)

    data = data.reset_index(drop=True)

    data.drop(['Dates'], axis=1, inplace=True)

    return data


def fullDaysOnly(data):
    data_fullDays = pd.DataFrame()

    for subjectId, df in data.groupby('subjectId'):
        df['Display Time'] = pd.to_datetime(df['Display Time'])
        df = df.reset_index(drop=True)
        temp = fullDay(df)
        data_fullDays = pd.concat([data_fullDays, temp],ignore_index=True)

    return(data_fullDays)

def datePreprocess(data):
    # data = data.reset_index()
    length = data.shape[0]
    for i in range(0,length):
        #print(i)
        s = str(data.iloc[i]['Display Time'])
        k = re.sub("[^0-9]", "", s)
        datetimeObj = parse(k) 
        data = data.replace(to_replace = s, value = datetimeObj)

    data = data.set_index(['Display Time'], drop=True)

    return(data)


In [27]:


def train(data = def_training):

    print("Training Model...\n\n")  
    data = fullDaysOnly(data)     
    data.drop(['subjectId'], axis=1, inplace=True)


    data['Display Time'] = data['Display Time'].apply(lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S'))
    data = data.set_index(['Display Time'], drop=True)

    scaler = MinMaxScaler(feature_range=(0, 1))
    train_sc = scaler.fit_transform(data)

    #Reshaping the data to work for an LSTM network

    train_sc_df = pd.DataFrame(train_sc, columns=['Y'], index=data.index)


    for s in range(1,2):
        train_sc_df['X_{}'.format(s)] = train_sc_df['Y'].shift(s)

    X_train = train_sc_df.dropna().drop('Y', axis=1)
    y_train = train_sc_df.dropna().drop('X_1', axis=1)


    X_train = X_train.as_matrix()
    y_train = y_train.as_matrix()


    X_train_lmse = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)


    # print('Train shape: ', X_train_lmse.shape)


    lstm_model = Sequential();
    lstm_model.add(LSTM(7, input_shape=(1, X_train_lmse.shape[1]), activation='relu', kernel_initializer='lecun_uniform', return_sequences=False));
    lstm_model.add(Dense(1));
    lstm_model.compile(loss='mean_squared_error', optimizer='adam');
    early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1);
    history_lstm_model = lstm_model.fit(X_train_lmse, y_train, epochs=1, batch_size=1, verbose=1, shuffle=False, callbacks=[early_stop]);
    print("Model trained successfully!")


def impute(test_data,flag=0):
    
    test_data = datePreprocess(test_data)
    b,e,s,f,gaps = .detectGap(test_data)
    test = test_data.iloc[0:f]
    test.drop(['subjectId'], axis=1, inplace=True)


    scaler = MinMaxScaler(feature_range=(0, 1))
    test_sc = scaler.fit_transform(test)
    X_test = test_sc[:-1]
    y_test = test_sc[1:]
    test_sc_df = pd.DataFrame(test_sc, columns=['Y'], index=test.index)
    for s in range(1,2):
        test_sc_df['X_{}'.format(s)] = test_sc_df['Y'].shift(s)


    X_test = test_sc_df.dropna().drop('Y', axis=1)
    y_test = test_sc_df.dropna().drop('X_1', axis=1)

    X_test = X_test.as_matrix()
    y_test = y_test.as_matrix()

    X_test_lmse = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    #print(X_test_lmse)
    y_pred_test_lstm = lstm_model.predict(X_test_lmse);

    #print("The R2 score on the Test set is:\t{:0.3f}".format(r2_score(y_test, y_pred_test_lstm)))


    #inversing the scaling
    lstm_pred = scaler.inverse_transform(y_pred_test_lstm)
    test_val = scaler.inverse_transform(y_test)

    lstm_pred = lstm_pred.tolist()

    lstm_pred = lstm_pred*200

    x=0
    for i in range(b-1,e):
        test_data['GlucoseValue'][i] = lstm_pred[x][0]
        x+=1


    if flag==1:
        return test_data
    else:
        print("Imputations performed!")
        # test_data['subjectId'] = subj_id
        test_data.to_csv(cwd+"/GlucoCheck/Data/Output/ImputedValues-temp.csv") 
        print("File saved!\nLocation:"+str(cwd+"/GlucoCheck/Data/Output/ImputedValues-temp.csv"))



SyntaxError: invalid syntax (<ipython-input-27-c76ff4f86e15>, line 48)

In [28]:
#Extracting Test Data
data = pd.read_csv("~/Desktop/NCSA_genomics/Python - notebooks/GlucoCheck/Data/Hall/data_hall_raw.csv")
data = data[data['subjectId']=='1636-69-032']
data['Display Time'] = data['Display Time'].apply(lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
data = data.reset_index(drop=True)
data

Unnamed: 0,Display Time,GlucoseValue,subjectId
0,2016-01-13 12:58:17,122,1636-69-032
1,2016-01-13 13:03:17,123,1636-69-032
2,2016-01-13 13:08:17,124,1636-69-032
3,2016-01-13 13:13:17,128,1636-69-032
4,2016-01-13 13:18:17,133,1636-69-032
...,...,...,...
1778,2016-01-19 17:12:49,101,1636-69-032
1779,2016-01-19 17:17:49,98,1636-69-032
1780,2016-01-19 17:22:49,101,1636-69-032
1781,2016-01-19 17:27:49,106,1636-69-032


In [29]:
start = 1100
end = start+500
data_with_missing = data.copy()
data_with_missing = createGap(data_with_missing,start,end)
data_with_missing

Unnamed: 0,Display Time,GlucoseValue,subjectId
0,2016-01-13 12:58:17,122,1636-69-032
1,2016-01-13 13:03:17,123,1636-69-032
2,2016-01-13 13:08:17,124,1636-69-032
3,2016-01-13 13:13:17,128,1636-69-032
4,2016-01-13 13:18:17,133,1636-69-032
...,...,...,...
1778,2016-01-19 17:12:49,101,1636-69-032
1779,2016-01-19 17:17:49,98,1636-69-032
1780,2016-01-19 17:22:49,101,1636-69-032
1781,2016-01-19 17:27:49,106,1636-69-032


In [30]:
model = train()

Training Model...




NameError: name 'self' is not defined