# data acquisition and df generation

get DOT, ETH, KSM and ETH over 5 years, 1 per day

In [None]:
from datetime import datetime
from itertools import islice
from os import getenv

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from ipywidgets import interact

In [None]:
# load datas for the 4 cryptos, cast as Float

dtypes_float = { k: np.float32 for k in ('Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap')}
raw_dot = pd.read_csv('../data/crypto/polkadot_2019-02-15_2024-02-14.csv', parse_dates=True, dtype=dtypes_float).dropna(axis=0)
raw_ksm = pd.read_csv('../data/crypto/kusama_2019-02-15_2024-02-14.csv', parse_dates=True, dtype=dtypes_float).dropna(axis=0)
raw_btc = pd.read_csv('../data/crypto/bitcoin_2019-02-15_2024-02-14.csv', parse_dates=True, dtype=dtypes_float).dropna(axis=0)
raw_eth = pd.read_csv('../data/crypto/ethereum_2019-02-15_2024-02-14.csv', parse_dates=True, dtype=dtypes_float).dropna(axis=0)

def build_train_test(df):
    df_train = df.loc[(df.Date>'2021-06-01') & (df.Date<='2023-12-31')].dropna()
    df_test = df.loc[(df.Date>='2024-01-01')].dropna()
    # return df_train.set_index('Date'), df_test.set_index('Date')
    return df_train, df_test
    
raw_crypto = pd.DataFrame({
    'Date': raw_btc.Start,
    'DOT': raw_dot.Open, 
    'KSM': raw_ksm.Open, 
    'BTC': raw_btc.Open, 
    'ETH': raw_eth.Open}
)

raw_crypto_train, raw_crypto_test = build_train_test(raw_crypto)

In [None]:
print(raw_crypto_train.head())
print(raw_crypto_test.head())
print(len(raw_crypto_test))

In [None]:
raw_crypto_keys = ('DOT', 'KSM', 'BTC', 'ETH')

def build_plot(df_train, df_test, keys):
    fig = make_subplots(
        rows=len(keys), 
        cols=1, 
        shared_xaxes=True, 
        shared_yaxes=False,
        vertical_spacing=0.02
        # subplot_titles=keys,
    )
    col = 1
    for i, key in enumerate(keys):
        row = i+1
        fig.add_scatter(
            x=list(df_train.Date), 
            y=list(df_train[key]), 
            mode='lines', 
            name='{} (train)'.format(key), 
            row=row, 
            col=col
        )
        fig.add_scatter(
            x=list(df_test.Date), 
            y=list(df_test[key]), 
            mode='lines', 
            name='{} (test)'.format(key), 
            row=row, 
            col=col
        )
        # print('cutoff date: {}'.format(df_train.index.max()))
        # print('xxx min={} max={}'.format(df_train[key].min(), df_train[key].max()))
        fig.add_scatter(
            x=(df_train.Date.max(), df_train.Date.max()),
            y=(df_train[key].min(), df_train[key].max()),
            mode='lines', 
            line=dict(color='firebrick', width=2, dash='dash'),
            row=row, 
            col=col
        )
        fig.update_xaxes(
            row=row, 
            col=col,
            # range=['1900-01-01', '2100-01-01'],
        )
        fig.update_yaxes(
            title_text=key, 
            row=row, 
            col=col,
            range=[
                min(df_train[key].min(), df_test[key].min()),
                max(df_train[key].max(), df_test[key].max()),
            ],
        )
    fig.update_layout(
        height=800, 
        showlegend=False,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,label="1m", step="month", stepmode="backward"),
                    dict(count=2,label="2m", step="month", stepmode="backward"),
                    dict(count=3,label="3m", step="month", stepmode="backward"),
                    dict(count=6,label="6m", step="month", stepmode="backward"),
                    dict(step="all"),
                ])
            ),
            rangeslider= {'visible': False},
            type= 'date',
        ),
        xaxis4_rangeslider_visible=True,
        xaxis4_type='date',
    )
    widget= go.FigureWidget(fig)
    return widget

fig = build_plot(raw_crypto_train, raw_crypto_test, raw_crypto_keys)

In [None]:
fig.show()

def zoom1(layout, xrange):
    start = fig.layout.xaxis.range[0].split(' ')[0]
    end = fig.layout.xaxis.range[1].split(' ')[0]
    prnt(start, end)
    col = 1
    for i, key in enumerate(raw_crypto_keys):
        row = i+1
        range_min = raw_crypto[key][start:end].min()
        range_max = raw_crypto[key][start:end].max()
        print('{} {} {}'.format(key, range_min, range_max))
        axis = 'yaxis{}'.format(row if row > 1 else '')
        print('{} {} {} {}'.format(key, axis, range_min, range_max))
        fig.update_yaxes(
            range=[range_min*0.9, range_max*1.1],
            row=row,
            col=col,
        )
fig.layout.on_change(zoom1, 'xaxis.range')
fig.layout.on_change(zoom1, 'xaxis4.range')

# gluonts + lag-llama
need more work to work well
jump to next section

In [None]:
import torch
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset
from gluonts.dataset.pandas import PandasDataset
from lag_llama.gluon.estimator import LagLlamaEstimator

In [None]:
def build_dataset(df_input):
    df_input_denormalized = df_input.reset_index().melt(
        id_vars='Date', 
        var_name='XXX', 
        value_name='CHF'
    ).loc[lambda df: df["XXX"] != "index"].astype({'CHF': np.float32})
    return PandasDataset.from_long_dataframe(
        dataframe=df_input_denormalized,
        timestamp='Date',
        freq="1D",
        item_id='XXX',
        target='CHF',
    )

dataset_crypto = build_dataset(raw_crypto)
dataset_crypto_train = build_dataset(raw_crypto_train)
dataset_crypto_test = build_dataset(raw_crypto_test)

prediction_length = 30
context_length = 30

In [None]:
ckpt = torch.load("lag-llama.ckpt", map_location=torch.device('cuda:0'))
estimator_args = ckpt["hyper_parameters"]["model_kwargs"]

In [None]:
estimator = LagLlamaEstimator(
    ckpt_path="lag-llama.ckpt",
    prediction_length=prediction_length,
    context_length=context_length,

    # estimator args
    input_size=estimator_args["input_size"],
    n_layer=estimator_args["n_layer"],
    n_embd_per_head=estimator_args["n_embd_per_head"],
    n_head=estimator_args["n_head"],
    scaling=estimator_args["scaling"],
    time_feat=estimator_args["time_feat"],
)

lightning_module = estimator.create_lightning_module()
transformation = estimator.create_transformation()
predictor = estimator.create_predictor(transformation, lightning_module)

In [None]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset_crypto_test,
    predictor=predictor,
)

In [None]:
forecasts = list(forecast_it)
tss = list(ts_it)

In [None]:
evaluator = Evaluator()

In [None]:
agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))

In [None]:
print("CRPS:", agg_metrics['mean_wQuantileLoss'])

In [None]:
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, shared_yaxes=False, vertical_spacing=0.02)

for idx, (forecast, ts) in islice(enumerate(zip(forecasts, tss)), 4):
    selector = ts.index>'2023-12-01'
    fig.add_scatter(
        x=ts.loc[selector].index.to_timestamp(), 
        y=[a[0] for a in ts.loc[selector].values],
        mode='lines',
        name=forecast.item_id,
        row=idx+1,
        col=1,
    )
    for quantile in ("p40", "p50", "p60", "p70"):
        q0 = forecast.quantile(quantile)
        fig.add_scatter(
            x=[(st+pd.Timedelta(days=n)) for n in range(len(q0))],
            y=q0,
            name='{} {}'.format(quantile, forecast.item_id),
            row=idx+1,
            col=1,
        )
    fig.update_yaxes(title_text=forecast.item_id, row=idx+1, col=1)
    # fig.update_xaxes(dtick="D2", tickformat="%b %d\n%Y", ticklabelmode="period", row=idx+1, col=1)
fig.update_layout(
    height=800, 
    showlegend=True
)
fig.show()

# autogluon version

In [None]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from autogluon.timeseries.splitter import ExpandingWindowSplitter

In [None]:
def build_timeserie(df_input):
    df_input_denormalized = df_input.reset_index().melt(
        id_vars='Date', 
        var_name='item_id', 
        value_name='target'
    ).loc[
        lambda df: df["item_id"] != "index"
    ].astype({'target': np.float32})
    df_input_denormalized.sort_values(["item_id", "Date"], inplace=True)
    print(df_input_denormalized.head())
    return TimeSeriesDataFrame(
        data=df_input_denormalized,
        timestamp_column='Date',
        id_column='item_id',
    )

prediction_length=len(raw_crypto_test)
crypto_data = build_timeserie(raw_crypto)
crypto_train = build_timeserie(raw_crypto_train)
crypto_test = build_timeserie(raw_crypto_test)
# crypto_train, crypto_test = crypto_data.train_test_split(prediction_length)
crypto_train.head()

In [None]:
predictor_medium = TimeSeriesPredictor(
    prediction_length=prediction_length,
    path="../data/model/crypto-medium",
    target="target",
    eval_metric="MASE",
    freq="D",
)

predictor_best = TimeSeriesPredictor(
    prediction_length=prediction_length,
    path="../data/model/crypto-best",
    target="target",
    eval_metric="MASE",
    freq="D",
)

In [None]:
predictor_best.fit(
    crypto_train,
    presets="best_quality",
    time_limit=1800,
    # num_val_windows=5,
)

In [None]:
predictor=predictor_best

predictions = predictor.predict(crypto_train)
print(predictions.head())

In [None]:
item_id = "DOT"

x_past = crypto_train.loc[item_id].index
y_past = crypto_train.loc[item_id]["target"]

x_pred = predictions.loc[item_id].index
y_pred = predictions.loc[item_id]

x_test = crypto_test.loc[item_id].index
y_test = crypto_test.loc[item_id]["target"]


data = [
    go.Scatter(x=list(x_past), y=list(y_past), name="Past TS"),
    go.Scatter(x=list(x_pred), y=list(y_pred["mean"]), name="Mean forecast"),
    go.Scatter(x=list(x_test), y=list(y_test), name="Future TS"),
    go.Scatter(
        x=list(x_pred), 
        y=list(y_pred["0.1"]), 
        name="p10",
        mode='lines',
        marker=dict(color="#444"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        x=list(x_pred), 
        y=list(y_pred["0.9"]), 
        name="p90",
        mode='lines',
        marker=dict(color="#444"),
        line=dict(width=0),
        fillcolor='rgba(68, 68, 68, 0.3)',
        fill='tonexty',
        showlegend=False
    ),
]

layout = dict(
    height=600, 
    showlegend=True,
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,label="1m", step="month", stepmode="backward"),
                dict(count=2,label="2m", step="month", stepmode="backward"),
                dict(count=3,label="3m", step="month", stepmode="backward"),
                dict(count=6,label="6m", step="month", stepmode="backward"),
                dict(step="all"),
            ]),
        ),
        rangeslider=dict(visible=True),
        type="date",
    ),
    hovermode="x",
)

fig = go.FigureWidget(data=data, layout=layout)
fig

In [None]:
def zoomy(layout, xrange):
    start = fig.layout.xaxis.range[0].split(' ')[0]
    end = fig.layout.xaxis.range[1].split(' ')[0]
    range_past = y_past.loc[start:end]
    range_pred = y_pred.loc[start:end]
    range_test = y_test.loc[start:end]
    range_min = min(
        range_test.min(),
        range_pred["0.1"].min(),
        range_pred["0.9"].min()
    )
    range_max = max(
        range_test.max(),
        range_pred["0.1"].max(),
        range_pred["0.9"].max()
    )
    if len(range_past)>0:
        range_min = min(range_past.min(), range_min)
        range_max = max(range_past.max(), range_max)
        
    fig.layout.yaxis.range=[range_min*0.9, range_max*1.1]

fig.layout.on_change(zoomy, 'xaxis.range')

In [None]:
predictor.leaderboard(crypto_data)