In [None]:
!pip -qq install --upgrade mxnet~=1.7 gluonts
!pip -qq install gluonts

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm # from tqdm import tqdm
import matplotlib.pyplot as plt
from copy import copy
import json

from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from gluonts.dataset.common import load_datasets, ListDataset
from gluonts.dataset.field_names import FieldName

In [None]:
df=pd.DataFrame()
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        tmp=pd.read_csv(os.path.join(dirname, filename))
        df=pd.concat([df,tmp])
    
df.Date=pd.to_datetime(df.Date)
df.sample(3)

In [None]:
df.shape

In [None]:
df.Symbol.nunique(),df.Symbol.unique()

Format for DeepAR to take it

In [None]:
df=df.sort_values('Date')
df_new=df.pivot_table(values='Close',columns='Date',index='Symbol')
df_new = np.log1p(df_new.pct_change()).reset_index()

#convert `Symbol` from names to integer
c_to_i={s:i for i,s in enumerate(df_new.Symbol)};print(len(c_to_i))
i_to_c={i:s for i,s in enumerate(df_new.Symbol)}

df_new=df_new.applymap(lambda s: c_to_i.get(s) if s in c_to_i else s)
df_new.sample(3)

Taking all the values

In [None]:
total_values = df_new.drop(["Symbol"], axis=1).values

defining how far ahead we want to predict

In [None]:
prediction_length=14

In [None]:
train_df = df_new.drop(["Symbol"], axis=1)
test_target_values = total_values.copy()#test will have entire dataset
train_target_values = [ts[:-prediction_length] for ts in total_values] #train will stop before prediction length
cat_cardinality = [23] #currency cardinality: 23 types
cat=[[x] for x in df_new.Symbol.values]


start_date = [pd.Timestamp("2013-04-29", freq='1D') for _ in range(len(df_new))] #repeat start date for the number of coins

In [None]:
train_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: coin,
#         FieldName.FEAT_DYNAMIC_REAL: sent,
    }
    for (target, start, coin) in zip(train_target_values,start_date,cat)#,train_cal_features_list)
], freq="D")

In [None]:
next(iter(train_ds))

In [None]:
test_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
#         FieldName.FEAT_DYNAMIC_REAL: sent,
        FieldName.FEAT_STATIC_CAT: coin
    }
    for (target, start, coin) in zip(test_target_values,start_date,cat)#,test_cal_features_list)
], freq="D")

In [None]:
next(iter(test_ds))

In [None]:
from gluonts.model.deepar import DeepAREstimator
from gluonts.mx.trainer import Trainer

n = 5_00
estimator = DeepAREstimator(
    prediction_length=prediction_length,
    context_length=15*prediction_length,
    freq="D",
#     use_feat_dynamic_real=True,
    use_feat_static_cat=True,
    cardinality=cat_cardinality,
    scaling=True,
#     num_layers=4,
    cell_type='gru',
    trainer=Trainer(
        epochs=n,
        num_batches_per_epoch=5,
        batch_size=100,
        
    )
)
train_output = estimator.train_model(train_ds)

In [None]:
# save the trained model in tmp/
from pathlib import Path
train_output.predictor.serialize(Path("/tmp/"))

In [None]:
# loads it back
from gluonts.model.predictor import Predictor
predictor_deserialized = Predictor.deserialize(Path("/tmp/"))

In [None]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,
    predictor=train_output.predictor,
    num_samples=10
)

print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(test_ds)))

print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(test_ds)))

In [None]:
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))
print(json.dumps(agg_metrics, indent=4))

In [None]:
item_metrics['item_id']=c_to_i.keys()
item_metrics.sort_values('abs_error')

In [None]:
plot_log_path = "./plots/"
directory = os.path.dirname(plot_log_path)
if not os.path.exists(directory):
    os.makedirs(directory)

def plot_prob_forecasts(ts_entry, forecast_entry, path, sample_id, inline=True):
    plot_length = 150
#     prediction_intervals = (50, 67, 95, 99)
    prediction_intervals = (50, 10, 90)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    _, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    ax.axvline(ts_entry.index[-prediction_length], color='r')
    plt.legend(legend, loc="upper left")
    if inline:
        plt.show()
        plt.clf()
    else:
        plt.savefig('{}forecast_{}.pdf'.format(path, sample_id))
        plt.close()

print("Plotting time series predictions ...")

for i in tqdm(range(len(forecasts))):
    print(i_to_c[i])
    ts_entry = tss[i]
    forecast_entry = forecasts[i]
    plot_prob_forecasts(ts_entry, forecast_entry, plot_log_path, i)
