<a href="https://colab.research.google.com/github/acse-2020/acse2020-acse9-finalreport-acse-jaq15/blob/main/evaluation_notebooks/Model_Evaluation_and_Multi_Plotting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

The cells below handle all the necessary imports to run our models, making use of the public repo feeder_repo, linked <!-- [Text](link) -->
[here](https://github.com/acse-jaq15/feeder_repo).

In [1]:
# clone the feeder repo to get data_reader module and financial time series data
!git clone https://github.com/acse-jaq15/feeder_repo.git

Cloning into 'feeder_repo'...
remote: Enumerating objects: 956, done.[K
remote: Counting objects: 100% (220/220), done.[K
remote: Compressing objects: 100% (118/118), done.[K
remote: Total 956 (delta 106), reused 208 (delta 96), pack-reused 736[K
Receiving objects: 100% (956/956), 291.81 MiB | 35.45 MiB/s, done.
Resolving deltas: 100% (442/442), done.
Checking out files: 100% (548/548), done.


In [2]:
# using '%' to enforce a permanent change of directory
%cd feeder_repo/

/content/feeder_repo


In [3]:
# checking contents listed correctly
!ls

base_model.py	  data_reader.py   output_dataframe.pkl  security_plotter.py
best_configs.txt  LICENSE	   README.md
data		  model_loader.py  saved_models


In [4]:
import ast
import os
import sys
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from keras.utils.layer_utils import count_params
from keras import backend as K
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [5]:
# turning off tensorflow info and warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [6]:
# appending path with 'feeder_repo' string
sys.path.append('feeder_repo')

# import Data_Reader class from data_reader module
from feeder_repo.data_reader import Data_Reader
# import Baseline_Model class from base_model module
from feeder_repo.base_model import Base_Model
# import Security_Plotter class from security_plotter module
from feeder_repo.security_plotter import Security_Plotter
# import Trained_Model class from model_loader module
from feeder_repo.model_loader import Trained_Model
# import Untrained_Model class from model_loader module
from feeder_repo.model_loader import Untrained_Model

In [7]:
# checking if the notebook is running on a GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
else:
    print(gpu_info)

Mon Aug  9 08:08:43 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Generating multiplots and saving model metrics
A loop is used to generate a grid plot of the predictions of each model on each security, while also evaluating the model and storing the results in a dataframe.

In [8]:
# storing the year of the time series to be used as test data
in_yr = 2019
# setting our window_length to be 30 days
window_len = 30

In [9]:
# storing the units of each security in a dictionary, for later plotting
unit_dict = {
                'Al': 'Price (USD/mt)',
                'Cu': 'Price (USD/mt)',
                'Corn': 'Price (USd/bushel)',
                'EURCHF': 'Spot exchange rate',
                'EURUSD': 'Spot exchange rate',
                'GBPUSD': 'Spot exchange rate',
                'Bund10y': 'Yield (%)',
                'Gilt10y': 'Yield (%)',
                'Treasury10y': 'Yield (%)',
                'Amazon': 'Price (USD)',
                'Google': 'Price (USD)',
                'Nvidia': 'Price (USD)'
            }

# storing a list of models
model_list = ['CNN', 'CNN_GRU', 'CNN_LSTM',
              'GRU', 'GRU_AE', 'GRU_LSTM',
              'LSTM', 'LSTM_AE', 'LSTM_GRU',
              'MLP', 'MLP_AE']

# storing a list of securities
security_list = ['Al', 'Cu', 'Corn',
                'EURCHF', 'EURUSD', 'GBPUSD',
                'Gilt10y', 'Bund10y', 'Treasury10y',
                'Amazon', 'Google', 'Nvidia']

In [10]:
with open('./best_configs.txt') as text_file:
  text_data = text_file.read()

best_config_dict = ast.literal_eval(text_data)

In [11]:
# mounting google drive for easy storage of plots and output dataframe
from google.colab import drive
# mounting the drive
drive.mount('/content/gdrive/')
# creating a string to save the plots and dataframe respectively
plot_path = '/content/gdrive/My Drive/multi_plots/'
df_path = '/content/gdrive/My Drive/output_df/'

Mounted at /content/gdrive/


In [12]:
# creating lists to store the model name, security name, model metrics and dummy metrics
m_list = []
s_list = []
mse_list = []
rmse_list = []
mae_list = []
d_mse_list = []
d_rmse_list = []
d_mae_list = []
epoch_list = []
t_param_list = []
activation_list = []
optimizer_list = []
batch_size_list = []
lr_list = []

# creating lists to be used to access the relevant subplot in the loop
sub_r = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
sub_c = [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]

# creating a bool to allow for figure formatting
first = True

# looping through each model in model_list
for m in model_list:
  # counting the number of iterations, reset after each model loop
  sub_plot_ctr = 0
  
  # creating a subplot, one per model loop and formatting various parameters
  fig, ax = plt.subplots(4, 3, figsize=(15,15))
  plt.xticks(rotation=45)
  fig.subplots_adjust(wspace=0.25, hspace=0.7)
  fig.suptitle(m+' Actual, Predicted and Dummy Prices', size='xx-large', y=0.92)

  # conditional logic to set time_distributed bool depending on the model type
  # in order to ensure input data is of correct dimensions
  if m == 'CNN_GRU' or m == 'CNN_LSTM':
    time_distributed = True
  else:
    time_distributed = False

  # looping through each security in security_list
  for s in security_list:
    # setting our row and column indices for easy subplot access
    row = sub_r[sub_plot_ctr]
    col = sub_c[sub_plot_ctr]

    # creating an instance of Data_Reader class
    in_data = Data_Reader(s, in_yr)
    # calling class method extract_train_test to generate training and test datasets
    in_data.extract_train_test()
    # calling class method extract_xy to generate X and y training and test datasets
    in_data.extract_xy(window_len, time_distributed)

    # assigning X_test and y_test
    X_test = in_data.X_test
    y_test = in_data.y_test

    # clearing the keras session on the back end to ease memory usage
    K.clear_session()

    # creating an instance of Trained_Model class
    trained_model = Trained_Model(m, s)
    # evaluating the model
    mse, rmse, mae = trained_model.evaluate(X_test, y_test)

    # creating an instance of Base_Model class using X_test
    base_model = Base_Model(X_test, window_len)
    # calling predict_y method
    base_model.predict_y(in_data.test_len - window_len)

    # using the trained model to predict y from X_test
    y_pred = trained_model.model.predict(X_test)
    # assigning y_dummy variable to .y_pred class attribute
    y_dummy = base_model.y_pred
    
    # calling class method extract_real_price to generate unnormalised prices
    in_data.extract_real_prices(y_pred, y_dummy)

    # assigning actual_price, predicted_price and dummy_price
    actual_price = in_data.actual_price
    predicted_price = in_data.predicted_price
    dummy_price = in_data.dummy_price

    # assinging other metrics to be saved
    n_epochs = best_config_dict[m+'_'+s]['epochs']
    tm_activation = best_config_dict[m+'_'+s]['activation']
    tm_batch_size = best_config_dict[m+'_'+s]['batch_size']
    tm_optimizer = best_config_dict[m+'_'+s]['optimizer']
    tm_lr = best_config_dict[m+'_'+s]['learning_rate']
    tm_t_p = count_params(trained_model.model.trainable_weights)  

    # converting to datetime date format and slicing
    date_time = in_data.data.date[in_data.train_len + window_len:]
    # converting the series to datetime using pandas
    series_dates = pd.to_datetime(date_time).dt.date
    # resetting index to 0 based
    series_dates = series_dates.reset_index(drop=True)
    # converting to matplotlib format
    series_dates = mdates.date2num(series_dates)

    # setting YearLocator
    years = mdates.YearLocator()
    # setting MonthLocator
    months = mdates.MonthLocator()
    # setting format to give year and verbose month '2019-Jan'
    d_format = mdates.DateFormatter('%Y-%b')

    # plotting values by accessing each subplot in turn
    ax[row, col].plot(series_dates, actual_price, label='Acutal Price')
    ax[row, col].plot(series_dates, predicted_price, label='Predicted Price')
    ax[row, col].plot(series_dates, dummy_price, label='Dummy Price')

    # setting x axis label
    ax[row, col].set_xlabel('Date')
    # getting y axis label from unit_dict
    ax[row, col].set_ylabel(unit_dict[s])
    # setting title
    ax[row, col].set_title(m+' '+s)
    # informing matplotlib that x axis contains dates
    ax[row, col].xaxis_date()
    ax[row, col].set_xticklabels(ax[row, col].get_xticks(), rotation=45, ha='right')
    # setting minor and major locators and format
    ax[row, col].xaxis.set_major_locator(months)
    ax[row, col].xaxis.set_major_formatter(d_format)
    ax[row, col].xaxis.set_minor_locator(years)

    # conditional logic to gather dummy model metrics only once
    if m == 'MLP_AE':
      # assigning y_true variable for metric calculation
      y_true = in_data.y_true
      # calculating dummy metrics and appending to relevant dummy lists
      d_mse = mean_squared_error(y_true, y_dummy)
      d_rmse = mean_squared_error(y_true, y_dummy, squared=False)
      d_mae = mean_absolute_error(y_true, y_dummy)
      d_mse_list.append(d_mse)
      d_rmse_list.append(d_rmse)
      d_mae_list.append(d_mae)

    # conditional logic to generate the figure legend only once
    if first:
      # generating the figure legend
      handles, labels = ax[row, col].get_legend_handles_labels()
      fig.legend(handles, labels, loc='lower center', ncol=3, fontsize='large')

      # assinging y_true variable
      y_true = in_data.y_true

      # finally setting first bool to False, reset to True after each model loop
      first = False

    # printing to keep track of progress
    print(m+' '+s+' complete')

    # incrementing the subplot counter after each security loop is complete
    sub_plot_ctr +=1

    # appending model metrics to relevant lists
    m_list.append(m)
    s_list.append(s)
    mse_list.append(mse)
    rmse_list.append(rmse)
    mae_list.append(mae)
    epoch_list.append(n_epochs)
    t_param_list.append(tm_t_p)
    activation_list.append(tm_activation)
    optimizer_list.append(tm_optimizer)
    batch_size_list.append(tm_batch_size)
    lr_list.append(tm_lr)
  
  # saving the matplotlib plot after the security loop is complete
  plt.savefig(plot_path+m+'_mutli_plot.png', dpi=600)
  # closing the plot to generate a fresh one in the next model loop
  plt.clf()
  
  # print to keep track of progress
  print(m+' all completed successfully')

# a quick loop to append dummy values to metrics lists
for i in range(len(security_list)):
  m_list.append('Dummy')
  s_list.append(security_list[i])
  mse_list.append(d_mse_list[i])
  rmse_list.append(d_rmse_list[i])
  mae_list.append(d_mae_list[i])
  epoch_list.append(None)
  t_param_list.append(None)
  activation_list.append(None)
  optimizer_list.append(None)
  batch_size_list.append(None)
  lr_list.append(None)

# following successful completion of the loop, generating a dictionary of metrics
output_dict = {'Model': m_list,
          'Security': s_list,
          'Epochs': epoch_list,
          'Trainable Parameters': t_param_list,
          'Activation': activation_list,
          'Optimizer': optimizer_list,
          'Batch Size': batch_size_list,
          'Learning Rate': lr_list,
          'MSE': mse_list,
          'RMSE': rmse_list,
          'MAE': mae_list}

# using output_dict to generate a dataframe
output_df = pd.DataFrame(output_dict)

CNN Al complete
CNN Cu complete
CNN Corn complete
CNN EURCHF complete
CNN EURUSD complete
CNN GBPUSD complete
CNN Gilt10y complete
CNN Bund10y complete
CNN Treasury10y complete
CNN Amazon complete
CNN Google complete
CNN Nvidia complete
CNN all completed successfully
CNN_GRU Al complete
CNN_GRU Cu complete
CNN_GRU Corn complete
CNN_GRU EURCHF complete
CNN_GRU EURUSD complete
CNN_GRU GBPUSD complete
CNN_GRU Gilt10y complete
CNN_GRU Bund10y complete
CNN_GRU Treasury10y complete
CNN_GRU Amazon complete
CNN_GRU Google complete
CNN_GRU Nvidia complete
CNN_GRU all completed successfully
CNN_LSTM Al complete
CNN_LSTM Cu complete
CNN_LSTM Corn complete
CNN_LSTM EURCHF complete
CNN_LSTM EURUSD complete
CNN_LSTM GBPUSD complete
CNN_LSTM Gilt10y complete
CNN_LSTM Bund10y complete
CNN_LSTM Treasury10y complete
CNN_LSTM Amazon complete
CNN_LSTM Google complete
CNN_LSTM Nvidia complete
CNN_LSTM all completed successfully
GRU Al complete
GRU Cu complete
GRU Corn complete
GRU EURCHF complete
GRU EURUS

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

In [13]:
output_df

Unnamed: 0,Model,Security,Epochs,Trainable Parameters,Activation,Optimizer,Batch Size,Learning Rate,MSE,RMSE,MAE
0,CNN,Al,200.0,17729.0,relu,adam,32.0,0.0010,0.004282,0.065435,0.051775
1,CNN,Cu,50.0,17729.0,selu,rmsprop,32.0,0.0005,0.003600,0.060004,0.047846
2,CNN,Corn,200.0,17729.0,selu,nadam,32.0,0.0010,0.004581,0.067683,0.048404
3,CNN,EURCHF,200.0,17729.0,selu,adam,64.0,0.0010,0.002924,0.054078,0.043020
4,CNN,EURUSD,100.0,17729.0,relu,adam,32.0,0.0010,0.002823,0.053128,0.041446
...,...,...,...,...,...,...,...,...,...,...,...
139,Dummy,Bund10y,,,,,,,0.011925,0.109202,0.088719
140,Dummy,Treasury10y,,,,,,,0.011908,0.109124,0.082874
141,Dummy,Amazon,,,,,,,0.034129,0.184740,0.154633
142,Dummy,Google,,,,,,,0.017869,0.133676,0.107493


In [14]:
output_df.to_pickle(df_path+'output_dataframe.pkl')

In [15]:
excel_path = '/content/gdrive/My Drive/output_excel/'
output_df.to_excel(excel_path+'output_data.xlsx')