## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import os
import random 
import torch

torch.manual_seed(10)
random.seed(10)

from darts import TimeSeries
from darts.models import NBEATSModel
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller
from darts.metrics import mape, r2_score, rmse
from darts.datasets import EnergyDataset
%matplotlib inline

2022-04-29 00:17:36 prophet.plot ERROR: Importing plotly failed. Interactive plots will not work.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


## Set up Data

In [2]:
# Download csv files
train_df = pd.read_csv("../Datasets/train_data.csv")
test_df = pd.read_csv("../Datasets/test_data.csv")

# Get length of train and test
train_len = len(train_df)
test_len = len(test_df)

# Create dataframe with all data
all_df = train_df.append(test_df)

# Get first date of train and test
first_train_date = train_df["Date"][0]
first_test_date = test_df["Date"][0]

# Create new date range which has no gaps and set it as new column in all_df
date_range = pd.date_range(start=first_train_date, periods = len(all_df))
all_df["Date2"] = date_range


# Get the first date of the test data. (2018-12-17)
test_start_date = all_df.iloc[train_len]["Date2"]

  all_df = train_df.append(test_df)


In [3]:
filler = MissingValuesFiller()
scaler = Scaler()

# All Data Series (with padding)
all_series = scaler.fit_transform(
    filler.transform(
        TimeSeries.from_dataframe(
            all_df, "Date2", ["Open"]
        )
    )
).astype(np.float32)

## Set Up Model

In [4]:
# Experiment values
seq_len_list = [5,10,30,50,70] # past
forecast_steps_list = [10,20,30,40,50,60] # future

curr_seq_len = seq_len_list[0] # Before
curr_for_steps = forecast_steps_list[0] # After. Forecast 753 steps

num_epochs = 50

In [5]:
# Create the NBEATS model
model_nbeats = NBEATSModel(
    input_chunk_length = curr_seq_len, # Lookback period
    output_chunk_length = curr_for_steps, # Forecast period
    generic_architecture=True, # Generic version of the model is used
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=512,
    n_epochs=num_epochs,
    nr_epochs_val_period=1,
    batch_size=150,
    model_name="nbeats_run",
    log_tensorboard=True,
    random_state=1
)

## Train Model

In [6]:
train_series = all_series[:train_len]

# Test series will include the end of the train series and will only be
# for the number of forecast steps
test_series = all_series[(train_len - curr_seq_len):(train_len + curr_for_steps)]

model_nbeats.fit(train_series, 
                  val_series = test_series, 
                  verbose = True)

[2022-04-29 00:17:39,274] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 2996 samples.
[2022-04-29 00:17:39,274] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 2996 samples.
2022-04-29 00:17:39 darts.models.forecasting.torch_forecasting_model INFO: Train dataset contains 2996 samples.
[2022-04-29 00:17:39,357] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 32-bits; casting model to float32.
[2022-04-29 00:17:39,357] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 32-bits; casting model to float32.
2022-04-29 00:17:39 darts.models.forecasting.torch_forecasting_model INFO: Time series values are 32-bits; casting model to float32.
2022-04-29 00:17:39 pytorch_lightning.utilities.rank_zero INFO: GPU available: True, used: False
2022-04-29 00:17:39 pytorch_lightning.utilities.rank_zero INFO: TPU available: False, using: 0 TPU cores
2022-04-29 00:17:39 pytorch

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

<darts.models.forecasting.nbeats.NBEATSModel at 0x7f2b22469c10>

## Test Model

In [7]:
# Get the predictions for the test dates
pred_series = model_nbeats.historical_forecasts(
    test_series,
    start=pd.Timestamp(test_start_date), # The first test date
    forecast_horizon=1,
    stride=1, # Do every test day
    retrain=False,
    verbose=True
)

  0%|          | 0/10 [00:00<?, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

Predicting: 20it [00:00, ?it/s]

## Produce a graph

In [8]:
def display_forecast(pred_series, ts_transformed, forecast_type, start_date=None):
    fig,ax = plt.subplots()
    fig.canvas.draw()
    
    if start_date:
        ts_transformed = ts_transformed.drop_before(start_date)

    # Plot lines
    pred_series_inv.plot(label=("Predicted"), color="teal")
    ts_transformed.univariate_component(0).plot(label="True", color="darkorange")


    plt.xlabel("Day", fontsize=14, fontweight="bold")
    plt.ylabel("Stock Price", fontsize=14, fontweight="bold")
    plt.title("Predicted Vs True Stock Prices", fontsize=14, fontweight="bold")

    ax.set_xticklabels(np.arange(0,len(pred_series),90));
    ax.grid(False)
    ax.patch.set_edgecolor("black")
    ax.patch.set_linewidth("1")
    plt.xticks(rotation=0)
    plt.legend(frameon=True)

In [9]:
# Create folder for graphs
folderName = "Graphs"
if not os.path.exists("Graphs"):
    os.makedirs(folderName)
    print("Graphs folder created!")

In [10]:
# Unscale everything
all_series_inv = scaler.inverse_transform(all_series)
test_series_inv = scaler.inverse_transform(test_series)
pred_series_inv = scaler.inverse_transform(pred_series)

if curr_for_steps == test_len:
    # Display a graph of predicted vs. actual
    display_forecast(pred_series_inv, test_series_inv[curr_seq_len:], "7 day")
    
    # Save the file 
    fileName = "Graphs/"+"seqLen_" + str(curr_seq_len) + "_foreSteps_" + str(curr_for_steps)+".png"
    plt.savefig(fileName, bbox_inches="tight")

## RMSE Results

In [11]:
# Record rmse values in csv file
rmse_value = round(rmse(test_series_inv[curr_seq_len:], pred_series_inv),3)
print("The RMSE value is:", rmse_value)

with open("resultsExp2.csv", "a") as res_file:
    res_file.write(str(curr_seq_len) + ", " + str(curr_for_steps)+ ", " + str(rmse_value) + ",epoch_num:"+str(num_epochs)+"\n")

The RMSE value is: 28.649
