# Predict reported number of chickenpox cases in NYC

### Load and Visualize Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt

dataset = np.loadtxt('datasets/chickenpox.csv')

In [None]:
plt.plot(dataset)
plt.xlabel('month')
plt.ylabel('(normalized) reported number of chickenpox cases')

### Create time series sequences

In [None]:
# partition data into windows for the RNN model
def window_transform_series(series, window_size):
    X = []
    for i in range(len(series) - window_size):
        X.append(series[i:i + window_size])

    y = series[window_size:]

    # reshape each
    X = np.asarray(X)
    X.shape = (np.shape(X)[0:2])
    y = np.asarray(y)
    y.shape = (len(y),1)

    return X, y

In [None]:
window_size = 12
X, y = window_transform_series(series = dataset,window_size = window_size)

### Split into training and testing sets

In [None]:
# split dataset into training and testing sets
train_test_split = int(np.ceil(2*len(y)/float(3)))

# use the first two-thirds as the training set
X_train = X[:train_test_split,:]
y_train = y[:train_test_split]

# keep the last third for testing
X_test = X[train_test_split:,:]
y_test = y[train_test_split:]

# reshape input to [samples, window size, stepsize] to use with Keras' RNN LSTM module
X_train = np.asarray(np.reshape(X_train, (X_train.shape[0], window_size, 1)))
X_test = np.asarray(np.reshape(X_test, (X_test.shape[0], window_size, 1)))

### Build and run an RNN regression model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import keras

# build RNN
def build_part1_RNN(window_size):
    model = Sequential()
    # layer 1 uses an LSTM module with 5 hidden units
    model.add(LSTM(5, input_shape=(window_size, 1)))
    # layer 2 uses a fully connected module with one unit
    model.add(Dense(1))
    return model
model = build_part1_RNN(window_size)

# build model using the recommended optimizer initialization from the Keras documentation
optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

# compile the model
model.compile(loss='mean_squared_error', optimizer=optimizer)

In [None]:
# run model
model.fit(X_train, y_train, epochs=1000, batch_size=50, verbose=0)

###  Check model performance

In [None]:
# generate predictions for training
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

In [None]:
# print out training and testing errors
training_error = model.evaluate(X_train, y_train, verbose=0)
print('training error = ' + str(training_error))

testing_error = model.evaluate(X_test, y_test, verbose=0)
print('testing error = ' + str(testing_error))

In [None]:
import matplotlib.pyplot as plt

# plot original series
plt.plot(dataset,color = 'k')

# plot training set prediction
split_pt = train_test_split + window_size 
plt.plot(np.arange(window_size,split_pt,1),train_predict,color = 'b')

# plot testing set prediction
plt.plot(np.arange(split_pt,split_pt + len(test_predict),1),test_predict,color = 'g')
plt.xlabel('month')
plt.ylabel('(normalized) reported number of chickenpox cases')
plt.legend(['original series','training fit','testing fit'],loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()