# This book is identical to RNN_soybean_daily_Wang.ipynb, except for the generation of test set predictions after validation set predictions

### CONTENTS
### 1 Imports
### 2 Arrange datasets
### 3 Define models
### 4 Train models

# 1 Imports

In [None]:
# RNN imports

import tensorflow as tf

from keras.preprocessing.sequence import TimeseriesGenerator

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN

from keras.models import load_model

from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
#General imports

import numpy as np
import pandas as pd

import scipy.stats as stats

import matplotlib.pyplot as plt
import matplotlib.dates

from datetime import datetime

from google.colab import drive
import json

In [None]:
# Import daily data

data_url = 'https://raw.githubusercontent.com/ptraver/data/main/bean_daily_V2.csv'
soybean_df = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# fix bad name
soybean_df.columns = ['value']

# take only 1974 - 2017
soybean_df = soybean_df['1974-01-01':'2017-12-31']

# soybean_df needs to lose last value becuase during process of VMD, the last value is lost
soybean_df = soybean_df[:-1]

# view loaded data
soybean_df.tail(2)

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2017-12-20,953.0
2017-12-21,947.5


In [None]:
# Import emd data
data_url = 'https://raw.githubusercontent.com/kumars8/Deloitte_login_prjct/master/soybean_emd_daily.csv'
soybean_emd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

In [None]:
# Import vmd data
data_url = 'https://raw.githubusercontent.com/kumars8/Deloitte_login_prjct/master/soybean_vmd_daily.csv'
soybean_vmd = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# 2 Arrange datasets

In [None]:
# Split data so that valid and test do not feature in decisions
from sklearn.model_selection import train_test_split

true_train, out_of_sample = train_test_split(soybean_df, test_size=0.2, shuffle=False)
true_valid, true_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

emd_train, out_of_sample = train_test_split(soybean_emd, test_size=0.2, shuffle=False)
emd_valid, emd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

vmd_train, out_of_sample = train_test_split(soybean_vmd, test_size=0.2, shuffle=False)
vmd_valid, vmd_test = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

In [None]:
# Scale data between 0 and 1
from sklearn.preprocessing import MinMaxScaler

emd_scaler = MinMaxScaler()
emd_train = emd_scaler.fit_transform(emd_train)
emd_valid = emd_scaler.transform(emd_valid)
emd_test = emd_scaler.transform(emd_test)

vmd_scaler = MinMaxScaler()
vmd_train = vmd_scaler.fit_transform(vmd_train)
vmd_valid = vmd_scaler.transform(vmd_valid)
vmd_test = vmd_scaler.transform(vmd_test)

true_scaler = MinMaxScaler()
true_train = true_scaler.fit_transform(true_train)
true_valid = true_scaler.transform(true_valid)
true_test = true_scaler.transform(true_test)

In [None]:
# Set input shape variables
n_inputs = 60
n_features = 1

In [None]:
# get emd sets in order
emd_extra_valid = emd_train[-n_inputs:]
emd_valid_set = np.concatenate((emd_extra_valid, emd_valid))

emd_extra_test = emd_valid[-n_inputs:]
emd_test_set = np.concatenate((emd_extra_test, emd_test))

emd_train_shaped = TimeseriesGenerator(emd_train, emd_train, length=n_inputs, batch_size=1)
emd_valid_shaped = TimeseriesGenerator(emd_valid_set, emd_valid_set, length=n_inputs, batch_size=1)
emd_test_shaped = TimeseriesGenerator(emd_test_set, emd_test_set, length=n_inputs, batch_size=1)

In [None]:
# get vmd sets in order
vmd_extra_valid = vmd_train[-n_inputs:]
vmd_valid_set = np.concatenate((vmd_extra_valid, vmd_valid))

vmd_extra_test = vmd_valid[-n_inputs:]
vmd_test_set = np.concatenate((vmd_extra_test, vmd_test))

vmd_train_shaped = TimeseriesGenerator(vmd_train, vmd_train, length=n_inputs, batch_size=1)
vmd_valid_shaped = TimeseriesGenerator(vmd_valid_set, vmd_valid_set, length=n_inputs, batch_size=1)
vmd_test_shaped = TimeseriesGenerator(vmd_test_set, vmd_test_set, length=n_inputs, batch_size=1)

In [None]:
# get non-decomposed sets in order
extra_valid = true_train[-n_inputs:]
valid_set = np.concatenate((extra_valid, true_valid))

extra_test = true_valid[-n_inputs:]
test_set = np.concatenate((extra_test, true_test))

train_shaped = TimeseriesGenerator(true_train, true_train, length=n_inputs, batch_size=1)
valid_shaped = TimeseriesGenerator(valid_set, valid_set, length=n_inputs, batch_size=1)
test_shaped = TimeseriesGenerator(test_set, test_set, length=n_inputs, batch_size=1)

# 3 Define models

In [None]:
# Set variables

units = 32

In [None]:
# Define model definition function

def define_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=units, input_shape=(n_inputs, n_features)))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer='adam', loss='mse')
  return RNNregressor

# 4 Train models

In [None]:
# Set variables

EPOCHS = 4

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


###on EMD soybean daily

In [None]:
# Set checkpoint callback
filepath = 'emd_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
EMDregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  EMDregressor.fit(emd_train_shaped,
                   epochs=EPOCHS,
                   validation_data=emd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'emd_RNN_daily_valid_03'

In [None]:
# Save best model to Drive
emd_RNN_daily_model = load_model(f'models/{best_model}.model')
emd_RNN_daily_model.save(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
emd_fc = emd_RNN_daily_model.predict(emd_valid_shaped)

In [None]:
# Rescale forecasted values
emd_rescaled = emd_scaler.inverse_transform(emd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/emd_RNN_daily_valid.npy', emd_rescaled.flatten())

In [None]:
#### TEST SET ####

In [None]:
# Load the model
emd_RNN_daily_model = load_model(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd test set forecasts
emd_fc = emd_RNN_daily_model.predict(emd_test_shaped)

In [None]:
# Rescale forecasted values
emd_rescaled = emd_scaler.inverse_transform(emd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/emd_RNN_daily_test.npy', emd_rescaled.flatten())

###on VMD soybean daily

In [None]:
# Set checkpoint callback
filepath = 'vmd_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
VMDregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  VMDregressor.fit(vmd_train_shaped,
                   epochs=EPOCHS,
                   validation_data=vmd_valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'vmd_RNN_daily_valid_02'

In [None]:
# Save best model to Drive
vmd_RNN_daily_model = load_model(f'models/{best_model}.model')
vmd_RNN_daily_model.save(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
vmd_fc = vmd_RNN_daily_model.predict(vmd_valid_shaped)

In [None]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/vmd_RNN_daily_valid.npy', vmd_rescaled.flatten())

In [None]:
# Use best model to generate emd validation set forecasts
vmd_fc = vmd_RNN_daily_model.predict(vmd_test_shaped)

In [None]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/vmd_RNN_daily_test.npy', vmd_rescaled.flatten())

In [None]:
#### TEST SET ####

In [None]:
# Load the model
vmd_RNN_daily_model = load_model(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd test set forecasts
vmd_fc = vmd_RNN_daily_model.predict(vmd_test_shaped)

In [None]:
# Rescale forecasted values
vmd_rescaled = vmd_scaler.inverse_transform(vmd_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/vmd_RNN_daily_test.npy', vmd_rescaled.flatten())

## on non-decomposed

In [None]:
# Set checkpoint callback
filepath = 'true_RNN_daily_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
TRUEregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  TRUEregressor.fit(train_shaped,
                   epochs=EPOCHS,
                   validation_data=valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'true_RNN_daily_valid_02'

In [None]:
# Save best model to Drive
true_RNN_daily_model = load_model(f'models/{best_model}.model')
true_RNN_daily_model.save(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd validation set forecasts
true_fc = true_RNN_daily_model.predict(valid_shaped)

In [None]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/true_RNN_daily_valid.npy', true_rescaled.flatten())

In [None]:
# Use best model to generate emd validation set forecasts
true_fc = true_RNN_daily_model.predict(test_shaped)

In [None]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/true_RNN_daily_test.npy', true_rescaled.flatten())

In [None]:
#### TEST SET ####

In [None]:
# Load the model
true_RNN_daily_model = load_model(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [None]:
# Use best model to generate emd test set forecasts
true_fc = true_RNN_daily_model.predict(test_shaped)

In [None]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [None]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/true_RNN_daily_test.npy', true_rescaled.flatten())

# End