# Time Series Forecasting

Starting with the basics.

Main examples are taken from here: https://machinelearningmastery.com/how-to-develop-deep-learning-models-for-univariate-time-series-forecasting/

In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import LSTM

from matplotlib import pyplot

## 1) Format data

In [2]:
df = pd.read_table('data/forecasting_tutorial/daily-total-female-births.txt', sep=',', header=0, 
                   names=('Date', 'Births'), dtype={'A': str, 'B': np.float64})
df.head()

Unnamed: 0,Date,Births
0,1959-01-01,35
1,1959-01-02,32
2,1959-01-03,30
3,1959-01-04,31
4,1959-01-05,44


In [53]:
series

Unnamed: 0,Month,Sales
0,1960-01,6550
1,1960-02,8728
2,1960-03,12026
3,1960-04,14395
4,1960-05,14587
5,1960-06,13791
6,1960-07,9498
7,1960-08,8251
8,1960-09,7049
9,1960-10,9545


In [55]:
series

Unnamed: 0,Month,Sales
0,1960-01,6550
1,1960-02,8728
2,1960-03,12026
3,1960-04,14395
4,1960-05,14587
5,1960-06,13791
6,1960-07,9498
7,1960-08,8251
8,1960-09,7049
9,1960-10,9545


In [3]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
    data: Sequence of observations as a list or NumPy array.
    n_in: Number of lag observations as input (X).
    n_out: Number of observations as output (y).
    dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [4]:
forecasting_data = series_to_supervised(list(df.Births), n_in=3)
forecasting_data.head()

Unnamed: 0,var1(t-3),var1(t-2),var1(t-1),var1(t)
3,35.0,32.0,30.0,31
4,32.0,30.0,31.0,44
5,30.0,31.0,44.0,29
6,31.0,44.0,29.0,45
7,44.0,29.0,45.0,43


## 2) MLP for Time Series Forecasting

In [27]:
def build_model():
    # define model
    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=3))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    
    return model

def fit_model(model, X, y, validation_data=None):
    # fit model
    if validation_data is not None:
        model.fit(X, y, epochs=2000, verbose=0, validation_data=validation_data)
    else:
        model.fit(X, y, epochs=2000, verbose=0)
    
    return model

def predict(model, x_input):
    yhat = model.predict(x_input, verbose=0)
    print(yhat)


In [29]:
# define dataset
X = np.array([[10, 20, 30], [20, 30, 40], [30, 40, 50], [40, 50, 60]])
y = np.array([40, 50, 60, 70])

model = build_model()
model = fit_model(model, X, y)

# demonstrate prediction
x_input = np.array([50, 60, 70])
x_input = x_input.reshape((1, 3))
predict(model, x_input)
model.evaluate(x_input, [80])

[[80.790794]]


0.6253557205200195

In [30]:
# define dataset
X = forecasting_data.drop(columns='var1(t)')
y = forecasting_data[['var1(t)']]

# split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

model = build_model()
model = fit_model(model, X_train, y_train, validation_data=(X_test, y_test))

# prediction
model.evaluate(X_test, y_test)



47.47899805704753

## 3) CNN for Time Series Forecasting

In [33]:
# define dataset
X = np.array([[10, 20, 30], [20, 30, 40], [30, 40, 50], [40, 50, 60]])
y = np.array([40, 50, 60, 70])
# reshape from [samples, timesteps] into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# define model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(3, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=1000, verbose=0)
# demonstrate prediction
x_input = np.array([50, 60, 70])
x_input = x_input.reshape((1, 3, 1))
yhat = model.predict(x_input, verbose=0)
print(yhat)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
[[82.47097]]


## 4) LSTM for Time Series Forecasting

In [42]:
# define dataset
X = np.array([[10, 20, 30], [20, 30, 40], [30, 40, 50], [40, 50, 60]])
y = np.array([40, 50, 60, 70])
# reshape from [samples, timesteps] into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(3, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=1000, verbose=0)
# demonstrate prediction
x_input = np.array([50, 60, 70])
x_input = x_input.reshape((1, 3, 1))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[83.55582]]
