

# RNN Example for Sine Wave

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

## Data

Let's use Numpy to create a simple sine wave.

In [None]:
x = np.linspace(0,50,501)
y = np.sin(x)

In [None]:
x

In [None]:
y

In [None]:
plt.plot(x,y)

Let's turn this into a DataFrame

In [None]:
df = pd.DataFrame(data=y,index=x,columns=['Sine'])

In [None]:
df

## Train Test Split

Note! This is very different from our usual test/train split methodology!

In [None]:
len(df)

In [None]:
test_percent = 0.1

In [None]:
len(df)*test_percent

In [None]:
test_point = np.round(len(df)*test_percent)

In [None]:
test_ind = int(len(df) - test_point)

In [None]:
test_ind

In [None]:
train = df.iloc[:test_ind]
test = df.iloc[test_ind:]

In [None]:
train

In [None]:
test

## Scale Data

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
# IGNORE WARNING ITS JUST CONVERTING TO FLOATS
# WE ONLY FIT TO TRAININ DATA, OTHERWISE WE ARE CHEATING ASSUMING INFO ABOUT TEST SET
scaler.fit(train)

In [None]:
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

# Time Series Generator

This class takes in a sequence of data-points gathered at
equal intervals, along with time series parameters such as
stride, length of history, etc., to produce batches for
training/validation.

#### Arguments
    data: Indexable generator (such as list or Numpy array)
        containing consecutive data points (timesteps).
        The data should be at 2D, and axis 0 is expected
        to be the time dimension.
    targets: Targets corresponding to timesteps in `data`.
        It should have same length as `data`.
    length: Length of the output sequences (in number of timesteps).
    sampling_rate: Period between successive individual timesteps
        within sequences. For rate `r`, timesteps
        `data[i]`, `data[i-r]`, ... `data[i - length]`
        are used for create a sample sequence.
    stride: Period between successive output sequences.
        For stride `s`, consecutive output samples would
        be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
    start_index: Data points earlier than `start_index` will not be used
        in the output sequences. This is useful to reserve part of the
        data for test or validation.
    end_index: Data points later than `end_index` will not be used
        in the output sequences. This is useful to reserve part of the
        data for test or validation.
    shuffle: Whether to shuffle output samples,
        or instead draw them in chronological order.
    reverse: Boolean: if `true`, timesteps in each output sample will be
        in reverse chronological order.
    batch_size: Number of timeseries samples in each batch
        (except maybe the last one).

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
# scaled_train

In [None]:
# define generator
length = 2 # Length of the output sequences (in number of timesteps)
batch_size = 1 #Number of timeseries samples in each batch
generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=batch_size)

In [None]:
len(scaled_train)

In [None]:
len(generator) # n_input = 2

In [None]:
# scaled_train

In [None]:
# What does the first batch look like?
X,y = generator[0]

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

In [None]:
# Let's redefine to get 10 steps back and then predict the next step out
length = 10 # Length of the output sequences (in number of timesteps)
generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=1)

In [None]:
# What does the first batch look like?
X,y = generator[0]

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

In [None]:
length = 50 # Length of the output sequences (in number of timesteps)
generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=1)

Now you will be able to edit the length so that it makes sense for your time series!

### Create the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,SimpleRNN

In [None]:
# We're only using one feature in our time series
n_features = 1

In [None]:
# define model
model = Sequential()

# Simple RNN layer
model.add(SimpleRNN(50,input_shape=(length, n_features)))

# Final Prediction
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
# fit model
model.fit_generator(generator,epochs=5)

In [None]:
model.history.history.keys()

In [None]:
losses = pd.DataFrame(model.history.history)
losses.plot()

## Evaluate on Test Data

In [None]:
first_eval_batch = scaled_train[-length:]

In [None]:
first_eval_batch

In [None]:
first_eval_batch = first_eval_batch.reshape((1, length, n_features))

In [None]:
model.predict(first_eval_batch)

In [None]:
scaled_test[0]

Now let's put this logic in a for loop to predict into the future for the entire test range.

----

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

In [None]:
current_batch.shape

In [None]:
current_batch

In [None]:
np.append(current_batch[:,1:,:],[[[99]]],axis=1)

**NOTE: PAY CLOSE ATTENTION HERE TO WHAT IS BEING OUTPUTED AND IN WHAT DIMENSIONS. ADD YOUR OWN PRINT() STATEMENTS TO SEE WHAT IS TRULY GOING ON!!**

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]
    
    # store prediction
    test_predictions.append(current_pred) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test_predictions

In [None]:
scaled_test

## Inverse Transformations and Compare

In [None]:
true_predictions = scaler.inverse_transform(test_predictions)

In [None]:
true_predictions

In [None]:
test

In [None]:
# IGNORE WARNINGS
test['Predictions'] = true_predictions

In [None]:
test

In [None]:
test.plot(figsize=(12,8))

## Adding in Early Stopping and Validation Generator

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=2)

In [None]:
length = 49
generator = TimeseriesGenerator(scaled_train,scaled_train,
                               length=length,batch_size=1)


validation_generator = TimeseriesGenerator(scaled_test,scaled_test,
                                          length=length,batch_size=1)

# LSTMS

In [None]:
# define model
model = Sequential()

# Simple RNN layer
model.add(LSTM(50,input_shape=(length, n_features)))

# Final Prediction
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.fit_generator(generator,epochs=20,
                   validation_data=validation_generator,
                   callbacks=[early_stop])

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]
    
    # store prediction
    test_predictions.append(current_pred) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
# IGNORE WARNINGS
true_predictions = scaler.inverse_transform(test_predictions)
test['LSTM Predictions'] = true_predictions
test.plot(figsize=(12,8))

# Forecasting

Forecast into unknown range. We should first utilize all our data, since we are now forecasting!

In [None]:
full_scaler = MinMaxScaler()
scaled_full_data = full_scaler.fit_transform(df)

In [None]:
length = 50 # Length of the output sequences (in number of timesteps)
generator = TimeseriesGenerator(scaled_full_data, scaled_full_data, length=length, batch_size=1)

In [None]:
model = Sequential()
model.add(LSTM(50, input_shape=(length, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit_generator(generator,epochs=6)

In [None]:
forecast = []

first_eval_batch = scaled_full_data[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]
    
    # store prediction
    forecast.append(current_pred) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
forecast = scaler.inverse_transform(forecast)

In [None]:
# forecast

In [None]:
df

In [None]:
len(forecast)

In [None]:
50*0.1

In [None]:
forecast_index = np.arange(50.1,55.1,step=0.1)

In [None]:
len(forecast_index)

In [None]:
plt.plot(df.index,df['Sine'])
plt.plot(forecast_index,forecast)

# Great Job!