# This book generates an RNN model on daily corn, exactly as described in Wang paper

## Assumptions, where Wang paper lacks detail:

### -Assume RNN input size is same as Wang's window size for SSA decomposition (60), arbitrarily
### -Assumption made regarding what point in validation set forecasts begin - forecasts begin at 1st sample in validation set, not n_input+1 sample
### -Even though not mentioned in paper, assume data was scaled between 0 and 1 becuase otherwise training is very slow
### -Assume model has no dropout, and a tanh activation function, mse loss function
### -Assume training not done in batches

### CONTENTS
### 1 Imports
### 2 Arrange datasets
### 3 Define models
### 4 Train models

# 1 Imports

In [90]:
# RNN imports

import tensorflow as tf

from keras.preprocessing.sequence import TimeseriesGenerator

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN

from keras.models import load_model

from tensorflow.keras.callbacks import ModelCheckpoint

In [91]:
#General imports

import numpy as np
import pandas as pd

import scipy.stats as stats

import matplotlib.pyplot as plt
import matplotlib.dates

from datetime import datetime

from google.colab import drive
import json

In [92]:
# Import daily data

data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_daily_V2.csv'
corn_df = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# fix bad name
corn_df.columns = ['value']

# take only 1974 - 2017
corn_df = corn_df['1974-01-01':'2017-12-31']

# corn_df needs to lose last value becuase during process of VMD, the last value is lost
corn_df = corn_df[:-1]

# view loaded data
corn_df.tail(2)

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2017-12-20,349.25
2017-12-21,350.75


In [93]:
# Import emd data
data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_emd_daily.csv'
corn_emd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

In [94]:
# Import vmd data
data_url = 'https://raw.githubusercontent.com/ptraver/data/main/corn_vmd_daily.csv'
corn_vmd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# 2 Arrange datasets

In [95]:
# Split data so that valid and test do not feature in decisions
from sklearn.model_selection import train_test_split

true_train, out_of_sample = train_test_split(corn_df, test_size=0.2, shuffle=False)
true_valid, true_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

emd_train, out_of_sample = train_test_split(corn_emd, test_size=0.2, shuffle=False)
emd_valid, emd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

vmd_train, out_of_sample = train_test_split(corn_vmd, test_size=0.2, shuffle=False)
vmd_valid, vmd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

In [96]:
# Scale data between 0 and 1
from sklearn.preprocessing import MinMaxScaler

emd_scaler = MinMaxScaler()
emd_train = emd_scaler.fit_transform(emd_train)
emd_valid = emd_scaler.transform(emd_valid)

vmd_scaler = MinMaxScaler()
vmd_train = vmd_scaler.fit_transform(vmd_train)
vmd_valid = vmd_scaler.transform(vmd_valid)

true_scaler = MinMaxScaler()
true_train = true_scaler.fit_transform(true_train)
true_valid = true_scaler.transform(true_valid)

In [97]:
# Set input shape variables
n_inputs = 60
n_features = 1

In [98]:
# get emd sets in order
emd_extra_valid = emd_train[-n_inputs:]
emd_valid_set = np.concatenate((emd_extra_valid, emd_valid))

emd_extra_test = emd_valid[-n_inputs:]
emd_test_set = np.concatenate((emd_extra_test, emd_test))

emd_train_shaped = TimeseriesGenerator(emd_train, emd_train, length=n_inputs, batch_size=1)
emd_valid_shaped = TimeseriesGenerator(emd_valid_set, emd_valid_set, length=n_inputs, batch_size=1)
emd_test_shaped = TimeseriesGenerator(emd_test_set, emd_test_set, length=n_inputs, batch_size=1)

In [99]:
# get vmd sets in order
vmd_extra_valid = vmd_train[-n_inputs:]
vmd_valid_set = np.concatenate((vmd_extra_valid, vmd_valid))

vmd_extra_test = vmd_valid[-n_inputs:]
vmd_test_set = np.concatenate((vmd_extra_test, vmd_test))

vmd_train_shaped = TimeseriesGenerator(vmd_train, vmd_train, length=n_inputs, batch_size=1)
vmd_valid_shaped = TimeseriesGenerator(vmd_valid_set, vmd_valid_set, length=n_inputs, batch_size=1)
vmd_test_shaped = TimeseriesGenerator(vmd_test_set, vmd_test_set, length=n_inputs, batch_size=1)

In [100]:
# get non-decomposed sets in order
extra_valid = true_train[-n_inputs:]
valid_set = np.concatenate((extra_valid, true_valid))

extra_test = true_valid[-n_inputs:]
test_set = np.concatenate((extra_test, true_test))

train_shaped = TimeseriesGenerator(true_train, true_train, length=n_inputs, batch_size=1)
valid_shaped = TimeseriesGenerator(valid_set, valid_set, length=n_inputs, batch_size=1)
test_shaped = TimeseriesGenerator(test_set, test_set, length=n_inputs, batch_size=1)

# 3 Define models

In [101]:
# Set variables

units = 32

In [102]:
# Define model definition function

def define_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=units, input_shape=(n_inputs, n_features)))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer='adam', loss='mse')
  return RNNregressor

# 4 Train models

In [121]:
# Set variables

EPOCHS = 4

In [14]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


###on EMD corn daily

In [104]:
# Set checkpoint callback
filepath = 'emd_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
EMDregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  EMDregressor.fit(emd_train_shaped,
                   epochs=EPOCHS,
                   validation_data=emd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [105]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'emd_RNN_daily_valid_03'

In [106]:
# Save best model to Drive
emd_RNN_daily_model = load_model(f'models/{best_model}.model')
emd_RNN_daily_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [107]:
# Use best model to generate emd validation set forecasts
emd_fc = emd_RNN_daily_model.predict(emd_valid_shaped)

In [135]:
# Rescale forecasted values
emd_rescaled = emd_scaler.inverse_transform(emd_fc)

In [136]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/emd_RNN_daily_valid.npy', emd_rescaled.flatten())

###on VMD corn daily

In [114]:
# Set checkpoint callback
filepath = 'vmd_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
VMDregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  VMDregressor.fit(vmd_train_shaped,
                   epochs=EPOCHS,
                   validation_data=vmd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [115]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'vmd_RNN_daily_valid_02'

In [116]:
# Save best model to Drive
vmd_RNN_daily_model = load_model(f'models/{best_model}.model')
vmd_RNN_daily_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [117]:
# Use best model to generate emd validation set forecasts
vmd_fc = vmd_RNN_daily_model.predict(vmd_valid_shaped)

In [137]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [138]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/vmd_RNN_daily_valid.npy', vmd_rescaled.flatten())

## on non-decomposed

In [122]:
# Set checkpoint callback
filepath = 'true_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
TRUEregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  TRUEregressor.fit(train_shaped,
                   epochs=EPOCHS,
                   validation_data=valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [125]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'true_RNN_daily_valid_02'

In [126]:
# Save best model to Drive
true_RNN_daily_model = load_model(f'models/{best_model}.model')
true_RNN_daily_model.save(f'/content/gdrive/My Drive/models/{best_model}.h5')

In [128]:
# Use best model to generate emd validation set forecasts
true_fc = true_RNN_daily_model.predict(valid_shaped)

In [130]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [133]:
# Save emd validation set predictions
np.save('/content/gdrive/My Drive/outputs/true_RNN_daily_valid.npy', true_rescaled.flatten())

# End