# This book is identical to RNN_corn_weekly.ipynb, except for the generation of test set predictions after validation set predictions

### CONTENTS
### 1 Imports
### 2 Arrange datasets
### 3 Define models
### 4 Train models

# 1 Imports

In [1]:
# RNN imports

import tensorflow as tf

from keras.preprocessing.sequence import TimeseriesGenerator

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN

from keras.models import load_model

from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
#General imports

import numpy as np
import pandas as pd

import scipy.stats as stats

import matplotlib.pyplot as plt
import matplotlib.dates

from datetime import datetime

from google.colab import drive
import json

In [3]:
# Import weekly data

data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_weekly.csv'
corn_df = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# fix bad name
corn_df.columns = ['value']

# take only 1974 - 2017
corn_df = corn_df['1974-01-01':'2017-12-31']

# corn_df needs to lose last value becuase during process of VMD, the last value is lost
corn_df = corn_df[:-1]

# view loaded data
corn_df.tail(2)

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2017-12-10,339.3
2017-12-17,338.85


In [4]:
# Import emd data
data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_emd_weekly.csv'
corn_emd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

In [5]:
# Import vmd data
data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_vmd_weekly.csv'
corn_vmd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

In [6]:
# Check indices are aligned
print(corn_df.index.equals(corn_emd.index))
print(corn_df.index.equals(corn_vmd.index))

True
True


# 2 Arrange datasets

In [7]:
# Split data so that valid and test do not feature in decisions
from sklearn.model_selection import train_test_split

true_train, out_of_sample = train_test_split(corn_df, test_size=0.2, shuffle=False)
true_valid, true_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

emd_train, out_of_sample = train_test_split(corn_emd, test_size=0.2, shuffle=False)
emd_valid, emd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

vmd_train, out_of_sample = train_test_split(corn_vmd, test_size=0.2, shuffle=False)
vmd_valid, vmd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

In [8]:
# Scale data between 0 and 1
from sklearn.preprocessing import MinMaxScaler

emd_scaler = MinMaxScaler()
emd_train = emd_scaler.fit_transform(emd_train)
emd_valid = emd_scaler.transform(emd_valid)
emd_test = emd_scaler.transform(emd_test)

vmd_scaler = MinMaxScaler()
vmd_train = vmd_scaler.fit_transform(vmd_train)
vmd_valid = vmd_scaler.transform(vmd_valid)
vmd_test = vmd_scaler.transform(vmd_test)

true_scaler = MinMaxScaler()
true_train = true_scaler.fit_transform(true_train)
true_valid = true_scaler.transform(true_valid)
true_test = true_scaler.transform(true_test)

In [9]:
# Set input shape variables
emd_n_inputs = 4
vmd_n_inputs = 5
non_decomp_n_inputs = 5

In [10]:
# get emd sets in order
emd_extra_valid = emd_train[-emd_n_inputs:]
emd_valid_set = np.concatenate((emd_extra_valid, emd_valid))

emd_extra_test = emd_valid[-emd_n_inputs:]
emd_test_set = np.concatenate((emd_extra_test, emd_test))

emd_train_shaped = TimeseriesGenerator(emd_train, emd_train, length=emd_n_inputs, batch_size=1)
emd_valid_shaped = TimeseriesGenerator(emd_valid_set, emd_valid_set, length=emd_n_inputs, batch_size=1)
emd_test_shaped = TimeseriesGenerator(emd_test_set, emd_test_set, length=emd_n_inputs, batch_size=1)

In [11]:
# get vmd sets in order
vmd_extra_valid = vmd_train[-vmd_n_inputs:]
vmd_valid_set = np.concatenate((vmd_extra_valid, vmd_valid))

vmd_extra_test = vmd_valid[-vmd_n_inputs:]
vmd_test_set = np.concatenate((vmd_extra_test, vmd_test))

vmd_train_shaped = TimeseriesGenerator(vmd_train, vmd_train, length=vmd_n_inputs, batch_size=1)
vmd_valid_shaped = TimeseriesGenerator(vmd_valid_set, vmd_valid_set, length=vmd_n_inputs, batch_size=1)
vmd_test_shaped = TimeseriesGenerator(vmd_test_set, vmd_test_set, length=vmd_n_inputs, batch_size=1)

In [12]:
# get non-decomposed sets in order
extra_valid = true_train[-non_decomp_n_inputs:]
valid_set = np.concatenate((extra_valid, true_valid))

extra_test = true_valid[-non_decomp_n_inputs:]
test_set = np.concatenate((extra_test, true_test))

train_shaped = TimeseriesGenerator(true_train, true_train, length=non_decomp_n_inputs, batch_size=1)
valid_shaped = TimeseriesGenerator(valid_set, valid_set, length=non_decomp_n_inputs, batch_size=1)
test_shaped = TimeseriesGenerator(test_set, test_set, length=non_decomp_n_inputs, batch_size=1)

# 3 Define models

In [None]:
# Set shape variables
emd_units = 7

vmd_L1_units = 7
vmd_L2_units = 4

non_decomp_units = 7

# Set number of features
n_features = 1

In [None]:
# Set other variables
activation = 'relu'
dropout = 0
optimizer = 'adam'
loss = 'mse'

In [None]:
# Define emd model definition function

def define_emd_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=emd_units, input_shape=(emd_n_inputs, n_features), activation=activation, dropout=dropout))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer=optimizer, loss=loss)
  return RNNregressor

In [None]:
# Define vmd model definition function

def define_vmd_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=vmd_L1_units, input_shape=(vmd_n_inputs, n_features), activation=activation, dropout=dropout, return_sequences=True))
  RNNregressor.add(SimpleRNN(units=vmd_L2_units, input_shape=(vmd_n_inputs, n_features), activation=activation, dropout=dropout))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer=optimizer, loss=loss)
  return RNNregressor

In [None]:
# Define non decomp model definition function

def define_non_decomp_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=non_decomp_units, input_shape=(non_decomp_n_inputs, n_features), activation=activation, dropout=dropout))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer=optimizer, loss=loss)
  return RNNregressor

# 4 Train models

In [None]:
# Set variables

decomp_EPOCHS = 50
non_decomp_EPOCHS = 8

In [13]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


###on EMD corn weekly

In [None]:
# Set checkpoint callback
filepath = 'emd_RNN_weekly_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
EMDregressor = define_emd_model()

# Train model
with tf.device('/device:GPU:0'):
  EMDregressor.fit(emd_train_shaped,
                   epochs=decomp_EPOCHS,
                   validation_data=emd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50

In [14]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'emd_RNN_weekly_valid_46'

In [None]:
# Save best model to Drive
emd_RNN_weekly_model = load_model(f'models/{best_model}.model')
emd_RNN_weekly_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
emd_fc = emd_RNN_weekly_model.predict(emd_valid_shaped)

In [None]:
# Rescale forecasted values
emd_rescaled = emd_scaler.inverse_transform(emd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/emd_RNN_weekly_valid.npy', emd_rescaled.flatten())

In [None]:
#### TEST SET ####

In [15]:
# Load the model
emd_RNN_weekly_model = load_model(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [16]:
# Use best model to generate emd test set forecasts
emd_fc = emd_RNN_weekly_model.predict(emd_test_shaped)

In [17]:
# Rescale forecasted values
emd_rescaled = emd_scaler.inverse_transform(emd_fc)

In [19]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/emd_RNN_weekly_test.npy', emd_rescaled.flatten())

###on VMD corn weekly

In [None]:
# Set checkpoint callback
filepath = 'vmd_RNN_weekly_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
VMDregressor = define_vmd_model()

# Train model
with tf.device('/device:GPU:0'):
  VMDregressor.fit(vmd_train_shaped,
                   epochs=decomp_EPOCHS,
                   validation_data=vmd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'vmd_RNN_weekly_valid_38'

In [None]:
# Save best model to Drive
vmd_RNN_weekly_model = load_model(f'models/{best_model}.model')
vmd_RNN_weekly_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
vmd_fc = vmd_RNN_weekly_model.predict(vmd_valid_shaped)

In [None]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/vmd_RNN_weekly_valid.npy', vmd_rescaled.flatten())

In [None]:
#### TEST SET ####

In [21]:
# Load the model
vmd_RNN_weekly_model = load_model(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [22]:
# Use best model to generate emd test set forecasts
vmd_fc = vmd_RNN_weekly_model.predict(vmd_test_shaped)

In [23]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [25]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/vmd_RNN_weekly_test.npy', vmd_rescaled.flatten())

## on non-decomposed

In [None]:
# Set checkpoint callback
filepath = 'true_RNN_weekly_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
TRUEregressor = define_non_decomp_model()

# Train model
with tf.device('/device:GPU:0'):
  TRUEregressor.fit(train_shaped,
                   epochs=non_decomp_EPOCHS,
                   validation_data=valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [26]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'true_RNN_weekly_valid_05'

In [None]:
# Save best model to Drive
true_RNN_weekly_model = load_model(f'models/{best_model}.model')
true_RNN_weekly_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
true_fc = true_RNN_weekly_model.predict(valid_shaped)

In [None]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/true_RNN_weekly_valid.npy', true_rescaled.flatten())

In [None]:
#### TEST SET ####

In [27]:
# Load the model
true_RNN_weekly_model = load_model(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [28]:
# Use best model to generate emd test set forecasts
true_fc = true_RNN_weekly_model.predict(test_shaped)

In [29]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [30]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/true_RNN_weekly_test.npy', true_rescaled.flatten())

# End