In [None]:
#import libraries
import os
import matplotlib.pyplot as plt
import numpy as np
from pandas import read_csv
from pandas import concat
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten, Dropout
from keras.layers import LSTM
from keras.utils import plot_model
from sklearn.metrics import mean_squared_error
from keras.models import load_model
from scipy import stats
import copy

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
fileName = '1b.csv'

data = read_csv(fileName)
dataAvg = [214.177329] #mean of data required to standardize the data
dataStd = [60.59258357] #standard deviation of data required to standardize the data
modelName = "lstm_recurrentdrop_B"   #required to save the model
nDiff = 1

#to make your data stationary and perform de-trending
def difference(data):
        return [data[i] - data[i - nDiff] for i in range(nDiff, len(data))]

def shiftData(data, lag, nOut = 1):
        dataOfOneColumn = DataFrame(data)
        dataOfOneColumnWithLag = list()
        for i in range(lag, 0, -1):
                dataOfOneColumnWithLag.append(dataOfOneColumn.shift(i))
        for i in range(0, nOut):
                dataOfOneColumnWithLag.append(dataOfOneColumn.shift(-i))
        dataOfOneColumnWithLag = concat(dataOfOneColumnWithLag, axis = 1)
        dataOfOneColumnWithLag.dropna(inplace = True)
        return dataOfOneColumnWithLag

def prepareDataset(data, lag, numPoints):
        trainingData = list()
        flag = True
        for column in data.columns:
            dataOfOneColumnWithLag = np.array(shiftData(data[column], lag))[:numPoints]
            if flag == True:
                    trainingData = dataOfOneColumnWithLag
                    flag = False
                    continue
            trainingData = np.append(trainingData, dataOfOneColumnWithLag, axis = 0)
        return trainingData

def train(trainingData, config):
        inputShape, numNode, numEpoch, batchSize = config
        if nDiff > 0:
                trainingData = np.array(difference(trainingData))
        trainX, trainY = trainingData[:, :-1], trainingData[:, -1]
        trainX = trainX.reshape((trainX.shape[0], trainX.shape[1], 1))
        model = Sequential()
        model.add(LSTM(numNode, activation = 'relu', input_shape = (inputShape, 1),recurrent_dropout=0.2))
        #model.add(Dropout(0.2)) #you can either add a dropout layer or recurrent dropout (not recommended both together)
        model.add(Dense(1))
        model.compile(loss = 'mse' , optimizer = 'adam')
        history = model.fit(trainX, trainY,  validation_split = 0.2, epochs = numEpoch, batch_size = batchSize, verbose = 0, shuffle = False)
        # summarize history for loss
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
        plt.close()
        print("training loss:" , np.mean(np.array(history.history['loss'])))
        print("Validation loss:" , np.mean(np.array(history.history['val_loss'])))
        return model

def predict(model, testX, inputShape):
        testX = np.array(testX).reshape((1, inputShape, 1))
        predictions = model.predict(testX, verbose = 0)
        return predictions[0]

#function to calculate r-square (value lies between 0 and 1)    
def rsquared(x, y):
    """ Return R^2 where x and y are array-like."""

    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    return r_value**2

def measureRmse(actual, predicted, dataAvg, dataStd, columnName):
        actual = np.array(actual)#*dataStd + dataAvg
        predicted = np.array(predicted)#*dataStd + dataAvg
        np.savetxt(modelName + "predicted_" + columnName, predicted)
        np.savetxt("actual_" + columnName, actual)
        actualAvg7 = list() #the rmse is calculated for 7 days. each value contains average expendiure in a day by all patients
        predictedAvg7 = list()
        for i in range(0, actual.shape[0], 7):
                actualAvg7.append(np.mean(actual[i:i+7]))
       	        predictedAvg7.append(np.mean(predicted[i:i+7]))
        plt.plot(np.array(actualAvg7))
        plt.plot(np.array(predictedAvg7))
        plt.show()
        avg_rmse=np.sqrt(mean_squared_error(np.array(actualAvg7), np.array(predictedAvg7)))
        actual_rmse=np.sqrt(mean_squared_error(np.array(actual), np.array(predicted)))
        
        print("Actual_rmse",actual_rmse)
        print("Average_rmse",avg_rmse)
        
        r_squared=rsquared(actual.reshape(actual.shape[0]), predicted.reshape(predicted.shape[0]))
        print("1-Rsquared",(1-r_squared)) #aim is to maximize r-square. so minimize (1-rsquare)
        return avg_rmse

def test(model, data, config, testDataFirstPredictionIndex):
        cnt = 0
        totalError = 0
        lag = config[0]
        testDataStartingIndex = testDataFirstPredictionIndex - lag
        for column in data.columns:
                predictions = list()
                testData = np.array(shiftData(data[column][testDataStartingIndex-1:], lag))
                correction = testData[:, -1]
                if nDiff > 0:
                        testData = np.array(difference(testData))
                testX, testY = testData[:, :-1], testData[:, -1]
                for i in range(testX.shape[0]):
                        predictedVal = predict(model, testX[i], lag)
                        if nDiff > 0:
                                predictedVal = predictedVal + correction[i]
                                testY[i] = testY[i] + correction[i]
                        predictions.append(predictedVal)
                error= measureRmse(testY, predictions, dataAvg[cnt], dataStd[cnt], column)
                totalError += error
                cnt = cnt + 1
        return error

def executeModel(data, config, numPoints):
    trainingData = prepareDataset(data, lag, numPoints)
    bestError = 30 #to handle stochasticity
    for j in range(1):
        model = train(trainingData, config)
        error = test(model, data, config, numPoints)
        if error < bestError:
                bestError = error
                model.save(modelName)
    return bestError

lag = 2
trainingDataSize = 1305
config = [lag, 4, 8, 20]
print(executeModel(data, config, trainingDataSize))

model = load_model(modelName)
print("TEST ERROR:",test(model, data, config, trainingDataSize))


