# Train Model

In [3]:
#Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [1]:
%store -r ili_h1
%store -r group_df

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import sys

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, os.pardir,'src')
sys.path.append(src_dir)

In [3]:
# %load ../../src/models/train_model.py
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import concat
from random import randrange
from pandas import Series
from pandas import datetime

from numpy import concatenate
from matplotlib import pyplot

%aimport models.helpers
from models.helpers import load_pandas
from models.helpers import series_to_supervised

from sklearn.metrics import mean_squared_error

ERROR:root:Line magic function `%aimport` not found.


In [9]:
from hyperopt import Trials, STATUS_OK, tpe
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers import Dense
from keras.layers import LSTM

from hyperas import optim
from hyperas.distributions import choice, uniform, conditional
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [18]:
def data():
    """
    Data providing function:

    """

    processed_dir = os.path.join(os.getcwd(), os.pardir, os.pardir, 'data')
    input_filepath = processed_dir + \
        "/interim/001-BB-CDC_ILI_2010-2015_US_STATES-DATA_interim.pickle"

    scaled = load_pandas(input_filepath)

    # specify the number of lag weeks
    n_states = 48
    n_years = 3
    n_weeks = 4
    n_features = 6  #features

    reframed = series_to_supervised(scaled, n_weeks, 1)

    # split into train and test sets
    values = reframed.values
    n_train_weeks = n_years * 48 * n_years * n_states
    logger.info("n_train_weeks : {}".format(n_train_weeks))
    train = values[:n_train_weeks, :]
    test = values[n_train_weeks:, :]

    #click.echo(reframed.head())
    # split into input and outputs
    n_obs = n_weeks * n_features * n_states

    x_train, y_train = train[:, :n_obs], train[:, -n_features]
    x_test, y_test = test[:, :n_obs], test[:, -n_features]

    # reshape input to be 3D [samples, timesteps, features]
    x_train = x_train.reshape((x_train.shape[0], n_weeks, n_features))
    x_test = x_test.reshape((x_test.shape[0],  n_weeks, n_features))


    return x_train, y_train, x_test, y_test

#### [_link to the article_](https://github.com/Kulbear/deep-learning-nano-foundation/wiki/ReLU-and-Softmax-Activation-Functions#rectified-linear-units)


### Rectified Linear Units

A rectified linear unit has output 0 if the input is less than 0, and raw output otherwise. That is, if the input is greater than 0, the output is equal to the input. ReLUs' machinery is more like a real neuron in your body

### Adaptative Moment Estimation ([Adam](http://ruder.io/optimizing-gradient-descent/index.html#adam))

Method that computes adaptive learning rates for each parameter, using stochastic gradient descent (SGD)

In [30]:
processed_dir = os.path.join(os.getcwd(),os.pardir,os.pardir,os.pardir, 'models')
processed_dir

'/Users/bbuildman/Documents/Developer/GitHub/001-BB-DL-ILI/notebooks/data/../../../models'

In [27]:
def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """

    processed_dir = os.path.join(os.getcwd(),os.pardir,os.pardir, 'models')
    output_filepath = processed_dir + "/keras_LTSM200_D1.hdf5"

    model = Sequential()
    model.add(LSTM(200, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(Dense(1))
    model.add(Activation('relu')) #rectifier Linear Unites

    model.compile(loss='mean_squared_error',
                  optimizer='adam', #Adaptive Moment Estimation (Adam) 
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    checkpointer = ModelCheckpoint(filepath=output_filepath,
                                   verbose=2,
                                   save_best_only=True)

    model.compile(loss='mae', metrics=['accuracy'],
                  optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

    history = model.fit(x_train, y_train,
                        batch_size={{choice([100, 300, 500])}},
                        epochs=50,
                        verbose=2,
                        #validation_split=0.8,
                        validation_data=(x_test, y_test))

    score, acc = model.evaluate(x_test, y_test, verbose=2)

    # list all data in history
    print(history.history.keys())
    # summarize history for accuracy
    processed_dir = os.path.join(os.getcwd(), 'reports', 'figures')

    fig1 = pyplot.figure()
    pyplot.plot(history.history['acc'])
    pyplot.plot(history.history['val_acc'])
    pyplot.title('model accuracy')
    pyplot.ylabel('accuracy')
    pyplot.xlabel('epoch')
    pyplot.legend(['train', 'test'], loc='upper left')
    #pyplot.show()
    output_filepath = processed_dir + "/model_accuracy.png"
    fig1.savefig(output_filepath)

    fig = pyplot.figure()
    pyplot.plot(history.history['loss'])
    pyplot.plot(history.history['val_loss'])
    pyplot.title('model train vs validation loss')
    pyplot.ylabel('loss')
    pyplot.xlabel('epoch')
    pyplot.legend(['train', 'validation'], loc='upper right')
    #pyplot.show()
    output_filepath = processed_dir + "/model_train_vs_validation_loss.png"
    fig.savefig(output_filepath)

    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

In [25]:
def main():
    """ Prepare dataset for Deep Learning 
    """
    best_run, best_model = optim.minimize(model=create_model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials())
    X_train, Y_train, X_test, Y_test = data()

    print("Best performing model chosen hyper-parameters:")
    print(best_run)

    print("Evalutation of best performing model:")
    print(best_model.evaluate(X_test, Y_test))
    predicted = best_model.predict(X_test)
    pyplot.plot(pd.DataFrame(predicted)[40:60])
    pyplot.plot(pd.DataFrame(Y_test)[40:60])
    pyplot.legend(['pred', 'actual'])

In [31]:
main()

IOError: [Errno 2] No such file or directory: '/Users/bbuildman/Documents/Developer/GitHub/001-BB-DL-ILI/notebooks/data/<ipython-input-25-a592e1f2d29a>'

In [1]:
#data present roc curve to present the perf. #
#present in form of question 