# This book is identical to RNN_soybean_weekly_weather.ipynb, except for the generation of test set predictions after validation set predictions

### CONTENTS
### 1 Imports
### 2 Arrange datasets
### 3 Define models
### 4 Train models

# 1 Imports

In [1]:
# RNN imports

import tensorflow as tf

from keras.preprocessing.sequence import TimeseriesGenerator

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN

from keras.models import load_model

from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
#General imports

import numpy as np
import pandas as pd

import scipy.stats as stats

import matplotlib.pyplot as plt
import matplotlib.dates

from datetime import datetime

from google.colab import drive
import json

In [3]:
# Import weekly data

data_url = 'https://raw.githubusercontent.com/kumars8/Deloitte_login_prjct/master/soybean_weekly.csv'
soybean_df = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='date')

# fix bad name
soybean_df.columns = ['value']

# take only 1974 - 2017
soybean_df = soybean_df['1974-01-01':'2017-12-31']

# soybean_df needs to lose last value becuase during process of VMD, the last value is lost
soybean_df = soybean_df[:-1]

# view loaded data
soybean_df.tail(2)

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2017-12-10,997.8
2017-12-17,974.3


In [4]:
# Import weather sets
data_url = 'https://raw.githubusercontent.com/ptraver/data/main/iowa_weather_train.csv'
iowa_weather_train = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='DATE')

data_url = 'https://raw.githubusercontent.com/ptraver/data/main/iowa_weather_valid.csv'
iowa_weather_valid = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='DATE')

data_url = 'https://raw.githubusercontent.com/ptraver/data/main/iowa_weather_test.csv'
iowa_weather_test = pd.read_csv(data_url, parse_dates=True, dayfirst=True, index_col='DATE')

# 2 Arrange datasets

In [5]:
# Split data so that valid and test do not feature in decisions
from sklearn.model_selection import train_test_split

true_train_split, out_of_sample = train_test_split(soybean_df, test_size=0.2, shuffle=False)
true_valid_split, true_test_split = train_test_split(out_of_sample, test_size=0.5, shuffle=False)

In [6]:
# Scale data between 0 and 1
from sklearn.preprocessing import MinMaxScaler

true_scaler = MinMaxScaler()
true_train_scaled = true_scaler.fit_transform(true_train_split)
true_valid_scaled = true_scaler.transform(true_valid_split)
true_test_scaled = true_scaler.transform(true_test_split)

In [7]:
true_train = pd.DataFrame(true_train_scaled, columns=true_train_split.columns, index=true_train_split.index)
true_valid = pd.DataFrame(true_valid_scaled, columns=true_valid_split.columns, index=true_valid_split.index)
true_test = pd.DataFrame(true_test_scaled, columns=true_test_split.columns, index=true_test_split.index)

In [8]:
# Double-check that indices are equal
print(true_train.index.equals(iowa_weather_train.index))
print(true_valid.index.equals(iowa_weather_valid.index))
print(true_test.index.equals(iowa_weather_test.index))

True
True
True


In [9]:
# Join weather and price data
train_weather = true_train.join(iowa_weather_train)
valid_weather = true_valid.join(iowa_weather_valid)
test_weather = true_test.join(iowa_weather_test)

In [10]:
# Set input shape variable
n_inputs = 4

In [11]:
# Get sets in order
train_shaped = TimeseriesGenerator(train_weather, true_train.values, length=n_inputs, batch_size=1)
valid_shaped = TimeseriesGenerator(valid_weather, true_valid.values, length=n_inputs, batch_size=1)
test_shaped = TimeseriesGenerator(test_weather, true_test.values, length=n_inputs, batch_size=1)

# 3 Define models

In [12]:
# Set shape variables
L1_units = 8
L2_units = 12
L3_units = 8

# Set number of features
n_features = 4

In [13]:
# Set other variables
activation = 'relu'
dropout = 0
optimizer = 'adam'
loss = 'mse'

In [14]:
# Define vmd model definition function

def define_model():
  RNNregressor = Sequential()
  RNNregressor.add(SimpleRNN(units=L1_units, input_shape=(n_inputs, n_features), activation=activation, dropout=dropout, return_sequences=True))
  RNNregressor.add(SimpleRNN(units=L2_units, input_shape=(n_inputs, n_features), activation=activation, dropout=dropout, return_sequences=True))
  RNNregressor.add(SimpleRNN(units=L3_units, input_shape=(n_inputs, n_features), activation=activation, dropout=dropout))
  RNNregressor.add(Dense(units=1))
  RNNregressor.compile(optimizer=optimizer, loss=loss)
  return RNNregressor

# 4 Train models

In [15]:
# Set variables
EPOCHS = 50

In [16]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [17]:
# Set checkpoint callback
filepath = 'true_RNN_weather_valid_{epoch:02d}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min'))

# Define model
RNNregressor = define_model()

# Train model
with tf.device('/device:GPU:0'):
  RNNregressor.fit(train_shaped,
                   epochs=EPOCHS,
                   validation_data=valid_shaped,
                   callbacks=[checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [18]:
## ACTION REQUIRED ##

# Select best model by lowest validation loss
best_model = 'true_RNN_weather_valid_24'

In [19]:
# Save best model to Drive
true_RNN_weather_model = load_model(f'models/{best_model}.model')
true_RNN_weather_model.save(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [20]:
# Use best model to generate emd validation set forecasts
true_fc = true_RNN_weather_model.predict(valid_shaped)

In [21]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [22]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/true_RNN_weather_valid.npy', true_rescaled.flatten())

In [28]:
#### TEST SET ####

In [24]:
# Load the model
true_RNN_weather_model = load_model(f'/content/gdrive/MyDrive/models/{best_model}.h5')

In [25]:
# Use best model to generate emd test set forecasts
true_fc = true_RNN_weather_model.predict(test_shaped)

In [26]:
# Rescale forecasted values
true_rescaled = true_scaler.inverse_transform(true_fc)

In [27]:
# Save emd validation set predictions
np.save('/content/gdrive/MyDrive/outputs/test/daily/true_RNN_weather_test.npy', true_rescaled.flatten())

# End