In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Data

In [None]:
df=pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
df=df.dropna(how='any')

In [None]:
df.isna().value_counts()

* Asset

In [None]:
asset=pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')

In [None]:
asset_map=lambda x : asset[asset.Asset_ID==x].Asset_Name.tolist()[0]

* Group



In [None]:
dfs={}
for i in np.unique(df.Asset_ID):
    dfs[i]=df[df.Asset_ID==i].reset_index(drop=True)
    dfs[i]=dfs[i].drop(['Asset_ID'],axis=1)
    dfs[i]['date']=pd.to_datetime(dfs[i].timestamp,unit='s')
    dfs[i]=dfs[i].set_index('date')


In [None]:
dfs[0].head(3)

In [None]:
dfs[0].tail(3)

* Plot trend (There's some data missing)

In [None]:
num_forecast_steps=60*24*3  #3 days

year='2020'

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(1, 1, 1)
for k,v in dfs.items():
    sns.lineplot(x=dfs[k].loc[year].index[:num_forecast_steps],y='Target',
                 data=dfs[k].loc[year][:num_forecast_steps],ax=ax,label=asset_map(k))
ax.set_ylabel("Return")
ax.set_xlabel("time")
fig.suptitle('A day of returns',fontsize=15)

fig.autofmt_xdate()
plt.legend(loc='upper right')

# Modeling

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp

from tensorflow_probability import distributions as tfd  #distribution
from tensorflow_probability import sts  #Bayesian structural time series models.

## Functions

* Plot a forecast distribution against the 'true' time series

In [None]:
def plot_forecast(x, y,
                  forecast_mean, forecast_scale, forecast_samples,
                  title, x_locator=None, x_formatter=None):
    
    #x:dates
    
    #y:target
    colors = sns.color_palette()
    c1, c2 = colors[0], colors[1]
    fig = plt.figure(figsize=(12, 6))
    ax = fig.add_subplot(1, 1, 1)
    num_steps = len(y)
    num_steps_forecast = forecast_mean.shape[-1]
    
    num_steps_train = num_steps - num_steps_forecast
    
    ax.plot(x, y, lw=2, color=c1, label='ground truth')
    
    forecast_steps=x[num_steps_train:]
    
    ax.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)

    ax.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
           label='forecast')
    ax.fill_between(forecast_steps,
                   forecast_mean-2*forecast_scale,
                   forecast_mean+2*forecast_scale, color=c2, alpha=0.2)

    ymin, ymax = min(np.min(forecast_samples), np.min(y)), max(np.max(forecast_samples), np.max(y))
    yrange = ymax-ymin
    ax.set_ylim([ymin - yrange*0.1, ymax + yrange*0.1])
    ax.set_title("{}".format(title))
    ax.legend()

    if x_locator is not None:
        ax.xaxis.set_major_locator(x_locator)
        ax.xaxis.set_major_formatter(x_formatter)
        fig.autofmt_xdate()

    return fig, ax

* Plot the contributions of posterior components in a single figure.

In [None]:
def plot_components(dates,
                    component_means_dict,
                    component_stddevs_dict,
                    x_locator=None,
                    x_formatter=None):
    colors = sns.color_palette()
    c1, c2 = colors[0], colors[1]

    axes_dict = collections.OrderedDict()
    num_components = len(component_means_dict)
    fig = plt.figure(figsize=(12, 2.5 * num_components))
    for i, component_name in enumerate(component_means_dict.keys()):
        component_mean = component_means_dict[component_name]
        component_stddev = component_stddevs_dict[component_name]

        ax = fig.add_subplot(num_components,1,1+i)
        ax.plot(dates, component_mean, lw=2)
        ax.fill_between(dates,
                     component_mean-2*component_stddev,
                     component_mean+2*component_stddev,
                     color=c2, alpha=0.5)
        ax.set_title(component_name)
        if x_locator is not None:
            ax.xaxis.set_major_locator(x_locator)
            ax.xaxis.set_major_formatter(x_formatter)
        axes_dict[component_name] = ax
    fig.autofmt_xdate()
    fig.tight_layout()
    return fig, axes_dict

* Plot a time series against a model's one-step predictions

In [None]:
def plot_one_step_predictive(dates, observed_time_series,
                             one_step_mean, one_step_scale,
                             x_locator=None, x_formatter=None):
    colors = sns.color_palette()
    c1, c2 = colors[0], colors[1]

    fig=plt.figure(figsize=(12, 6))
    ax = fig.add_subplot(1,1,1)
    num_timesteps = one_step_mean.shape[-1]
    ax.plot(dates, observed_time_series, label="observed time series", color=c1)
    ax.plot(dates, one_step_mean, label="one-step prediction", color=c2)
    ax.fill_between(dates,
                  one_step_mean - one_step_scale,
                  one_step_mean + one_step_scale,
                  alpha=0.1, color=c2)
    ax.legend()

    if x_locator is not None:
        ax.xaxis.set_major_locator(x_locator)
        ax.xaxis.set_major_formatter(x_formatter)
        fig.autofmt_xdate()
    fig.tight_layout()
    return fig, ax

* Build model

    Local-Linear Trend model + minute-of-hour seasonal effect

In [None]:
def build_model(observed_time_series,num_seasons):  
    trend = sts.LocalLinearTrend(observed_time_series=observed_time_series)
    
    seasonal = tfp.sts.Seasonal(num_seasons=num_seasons, observed_time_series=observed_time_series)
    
    model = sts.Sum([trend, seasonal], observed_time_series=observed_time_series)
    
    return model

## Example :  LocalLinearTrend Model

   * Forecast only by single variable 

   * Given Target Observation (2020 , first 600 minutes data)
   
   * Forecast next unseen Target (2020 ,next 300 minutes data)

In [None]:
#since data  is minutes wise, season is an hour

asset=1 #bitcoin
year='2020'
num_seasons=60 #60 minutes--> 1 hours
num_obs=600 #600 minutes observation --> 10 hours

model=build_model(dfs[asset].loc[year][:num_obs].Target,num_seasons)


# Build the variational surrogate posteriors `qs`.
variational_posteriors = sts.build_factored_surrogate_posterior(model=model)

In [None]:
num_variational_steps = 100  #update steps

#optimize observation 

loss_curve = tfp.vi.fit_surrogate_posterior(
    
    target_log_prob_fn=model.joint_log_prob(observed_time_series=dfs[asset].loc[year].Target[:num_obs]),
    
    surrogate_posterior=variational_posteriors,
    
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    
    num_steps=num_variational_steps)

plt.plot(loss_curve)
plt.show()

In [None]:
# Draw samples from the variational posterior.
q_samples = variational_posteriors.sample(1000)

In [None]:
print("Inferred parameters:")
for param in model.parameters:
    print("{}: {} +- {}".format(param.name,
                              np.mean(q_samples[param.name], axis=0),
                              np.std(q_samples[param.name], axis=0)))

In [None]:
num_forecast_steps=60*5 #5 hour forecast

forecast_dist = sts.forecast(
    model,
    observed_time_series=dfs[asset].loc[year].Target[:num_obs],
    parameter_samples=q_samples,
    num_steps_forecast=num_forecast_steps)

In [None]:
num_samples=100  #generate n forecast samples

forecast_mean, forecast_scale, forecast_samples = (
    forecast_dist.mean().numpy()[..., 0],
    forecast_dist.stddev().numpy()[..., 0],
    forecast_dist.sample(num_samples).numpy()[..., 0])

* All 100 series and mean and std of each time

In [None]:
forecast_mean.shape,forecast_scale.shape

In [None]:
forecast_samples.shape

In [None]:
fig, ax = plot_forecast(
    dfs[asset].loc[year].index[:num_obs+num_forecast_steps],
    dfs[asset].loc[year].Target[:num_obs+num_forecast_steps],
    forecast_mean, forecast_scale, forecast_samples,
    x_locator=None,
    x_formatter=None,
    title=f"Returns in {year}")


ax.legend(loc="upper left")
ax.set_ylabel(f"Returns")
ax.set_xlabel("minutes")
fig.autofmt_xdate()

In [None]:
y_true= dfs[asset].loc[year].Target[num_obs:num_obs+num_forecast_steps]
y_pred=forecast_mean

tf.keras.metrics.mean_squared_error(y_true,y_pred)

* I will learn more about tfp library and try more statistical model then update the version