## Package Installaion

In [4]:
%pip install statsmodels

Collecting statsmodels
  Downloading statsmodels-0.13.5-cp310-cp310-win_amd64.whl (9.1 MB)
     ---------------------------------------- 9.1/9.1 MB 32.5 MB/s eta 0:00:00
Collecting patsy>=0.5.2
  Downloading patsy-0.5.3-py2.py3-none-any.whl (233 kB)
     ---------------------------------------- 233.8/233.8 kB ? eta 0:00:00
Installing collected packages: patsy, statsmodels
Successfully installed patsy-0.5.3 statsmodels-0.13.5
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install neuralprophet
%pip install matplotlib
%pip install seaborn
%pip install pickle

Collecting neuralprophet
  Downloading neuralprophet-0.4.2-py3-none-any.whl (103 kB)
     -------------------------------------- 104.0/104.0 kB 5.9 MB/s eta 0:00:00
Collecting LunarCalendar>=0.0.9
  Using cached LunarCalendar-0.0.9-py2.py3-none-any.whl (18 kB)
Collecting ipywidgets>=7.5.1
  Using cached ipywidgets-8.0.2-py3-none-any.whl (134 kB)
Collecting holidays>=0.11.3.1
  Using cached holidays-0.17.2-py3-none-any.whl (187 kB)
Collecting plotly>=4.14.3
  Using cached plotly-5.11.0-py2.py3-none-any.whl (15.3 MB)
Collecting torch-lr-finder>=0.2.1
  Using cached torch_lr_finder-0.2.1-py3-none-any.whl (11 kB)
Installing collected packages: plotly, LunarCalendar, holidays, torch-lr-finder, ipywidgets, neuralprophet
Successfully installed LunarCalendar-0.0.9 holidays-0.17.2 ipywidgets-8.0.2 neuralprophet-0.4.2 plotly-5.11.0 torch-lr-finder-0.2.1
Note: you may need to restart the kernel to use updated packages.
Collecting git+https://github.com/ourownstory/neural_prophet.git
  Cloning htt

  Running command git clone --filter=blob:none --quiet https://github.com/ourownstory/neural_prophet.git 'C:\Users\sunny\AppData\Local\Temp\pip-req-build-4_wib34z'


Collecting seaborn
  Downloading seaborn-0.12.1-py3-none-any.whl (288 kB)
     -------------------------------------- 288.2/288.2 kB 8.7 MB/s eta 0:00:00
Collecting matplotlib!=3.6.1,>=3.1
  Using cached matplotlib-3.6.2-cp310-cp310-win_amd64.whl (7.2 MB)
Installing collected packages: matplotlib, seaborn
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.6.1
    Uninstalling matplotlib-3.6.1:
      Successfully uninstalled matplotlib-3.6.1
Successfully installed matplotlib-3.6.2 seaborn-0.12.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement pickle (from versions: none)
ERROR: No matching distribution found for pickle


## Package Importation

In [5]:
from neuralprophet import NeuralProphet, set_random_seed, set_log_level; set_random_seed(314159); set_log_level("ERROR", "INFO")
from neuralprophet.df_utils import make_future_df, fill_linear_then_rolling_avg, add_missing_dates_nan

import pandas as pd
import numpy as np

import statsmodels.api as sm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import matplotlib.pyplot as plt
import seaborn as sns; sns.set_theme(color_codes=True); sns.set_style("whitegrid")

import time

import pickle

## Reading Data

Below we read in the data for train, val and test as well as the associated historical dataframe

In [9]:
categorized_history = pd.read_csv('../data_processing/categorized_history.csv', index_col=0)

In [65]:
train = pd.read_csv('../data_processing/train.csv', index_col=0)
val = pd.read_csv('../data_processing/val.csv', index_col=0)
test = pd.read_csv('../data_processing/test.csv', index_col=0)

## Creating CSV

In [19]:
industry_symbol = categorized_history[['GICS Sub-Industry', 'Symbol']]
industry_symbol.to_csv('industry_symbol.csv')

## Creating Mappings

Below we create mappings for:

- `industry_to_symbol_map` i.e. for a given Industry map to all corresponding Symbols
- `symbol_to_industry_map` i.e. for every Symbol we map to a GICS Industry
- `index_to_symbol_map` i.e. for every `index` we map to a Symbol
- `symbol_to_index_map` i.e. for every Symbol we map to an `index`

In [56]:
industry_to_symbol_map = industry_symbol.groupby('GICS Sub-Industry')['Symbol'].apply(list).to_dict()
symbol_to_industry_map = dict(zip(industry_symbol['Symbol'], industry_symbol['GICS Sub-Industry']))

In [59]:
index_to_symbol_map = pd.Series(industry_symbol['Symbol']).to_dict()
symbol_to_index_map = dict(zip(index_to_symbol_map.values(), index_to_symbol_map.keys()))

## Auxiliary Functions

In [None]:
def plot_forecast(model, data, periods=0, historic_pred=True, highlight_steps_ahead=None, xlabel='Date', ylabel=None, title=None):
    
    future = model.make_future_dataframe(data, 
                                         periods=periods, 
                                         n_historic_predictions=historic_pred)
    forecast = model.predict(future)
    
    fig, ax = plt.subplots(figsize=(14,10))

    if highlight_steps_ahead is not None:
        model = model.highlight_nth_step_ahead_of_each_forecast(highlight_steps_ahead)
        model.plot_last_forecast(forecast, ax=ax, xlabel=xlabel, ylabel=ylabel)
        ax.set_title(title, fontsize=20, fontweight="bold")

    else:    
        model.plot(forecast, ax=ax, xlabel=xlabel, ylabel=ylabel)
        ax.set_title(title, fontsize=20, fontweight="bold")

In [None]:
def plot_validation(metrics_df, metrics = ['SmoothL1Loss', 'MAE', 'RMSE']):
    met_len = len(metrics)

    fig, axs = plt.subplots(ncols=len(metrics), nrows=1, sharex=True, sharey=True, figsize=(8*met_len, 8))
    fig.suptitle('Validation Metrics', fontsize=20, y=0.95)

    if met_len == 1:
        metric = metrics[0]
        axs.plot(metrics_df[metric], 'ob', linewidth=6, label='Training Loss', markersize='3')
        axs.plot(metrics_df[f'{metric}_val'], 'r', linewidth=2, label='Validation Loss')

        axs.set_xlabel('Training Loss')
        axs.set_ylabel('Validation Loss')
        axs.set_title(metric)
        axs.legend()
    else:
        for metric, ax in zip(metrics, axs.ravel()):
            ax.plot(metrics_df[metric], 'ob', linewidth=6, label='Training Loss', markersize='3')
            ax.plot(metrics_df[f'{metric}_val'], 'r', linewidth=2, label='Validation Loss')

            ax.set_xlabel('Training Loss')
            ax.set_ylabel('Validation Loss')
            ax.set_title(metric)
            ax.legend()