In [None]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
%matplotlib inline 
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

df = pd.read_csv('./data/time_series_course_data/daily-total-female-births.csv')
# convert the Date columns
df['Date'] = pd.to_datetime(df["Date"], format="%Y-%m-%d")

# convert to supervised learning problem
def transform_to_supervised(s, window_size):
    """
    Takes a series as an input and creates a dataframe where the rows correspond to a supervised learning format. In this format rows with a length 
    of j will have j-1 X entries and 1 y entry. It is like a "rolling window".
    INPUT:
        - s: pd.Series() with all the values that shall be put in the supervised learning format. Time series data.
        - window_size: integer number that is the window length
    OUTPUT:
        - df: pd.DataFrame() where each row is another window.
    """
    df = pd.DataFrame()
    assert type(window_size) == int
    for ii in range(0,len(s)-window_size):
        t = s.shift(-ii).values[0:window_size+1]
        df = df.append(pd.Series(t),ignore_index=True)

    return df

# scaler object
def scale_data(train, test):
    """
    Scales the train and test data in range of [-1, 1].
    Input:
        - train: training data as numpy array in supervised learning format.
        - test: test data as numpy array in supervised learning format.
    Output:
        - scaler: the scaler object
        - train_scaled: scaled training data
        - test_scaled: scaled test data
    """
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform training data
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test data
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

window_size = 3
batch_size = 4
epochs = 200
neurons = 100

# convert to supervised learning problem
supervised_values = transform_to_supervised(df['Births'], window_size).values

# split into train and test set
split = 0.33 # test set
train_size = int(len(supervised_values)*(1-split))
test_size = len(supervised_values)-train_size
train_supervised = supervised_values[0:train_size]
test_supervised = supervised_values[train_size:]

# scaler?
# scaler, train_scaled, test_scaled = scale_data(train_supervised, test_supervised)

# select X and y for training and testing
X_train = train_supervised[:,0:-1]
y_train = train_supervised[:,-1]
X_test = test_supervised[:,0:-1]
y_test = test_supervised[:,-1]

# reshape
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# define model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(window_size, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(neurons, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)

# Estimate model performance
trainScore = model.evaluate(X_train, y_train,verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, np.sqrt(trainScore)))
testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, np.sqrt(testScore)))

# VISUAL INSPECTION
# generate predictions for training and test
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)
# shift train and test prediction back
# train data
# make an empty array
train_predict_full = np.empty(shape=(len(df),1))
# ensure that it is nan
train_predict_full[:,:] = np.nan
# insert the training data into the array at the shifted position
train_predict_full[window_size:len(train_predict)+window_size,:] = train_predict
# test data
# make an empty array
test_predict_full = np.empty(shape=(len(df),1))
# ensure that it is nan
test_predict_full[:,:] = np.nan
# insert the test data into the array at the shifted position
test_predict_full[len(train_predict):-window_size, :] = test_predict
predictions = pd.DataFrame({'train': train_predict_full.flatten(), 'test': test_predict_full.flatten()})
predictions['Date'] = df['Date']
plt.plot([1,2,3,4], [1,2,3,4])
plt.show()

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
