In [1]:
import pandas as pd
import numpy as np

# Загрузка и подготовка данных

In [2]:
instruments = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'LTCUSDT', 'EOSUSDT']
files = ['close.csv', 'open.csv', 'low.csv', 'high.csv', 'volume.csv']
columns = ['close', 'open', 'low', 'high', 'volume']

In [3]:
series_per_ccys = {k: [] for k in instruments}

for file in files:
    frame = pd.read_csv('data/' + file,parse_dates=True,index_col=0)
    for instrument in instruments:
        series_per_ccys[instrument].append(frame[instrument])
df_per_ccys = {k: pd.DataFrame(series_per_ccys[k], columns).transpose() for k in instruments}
df_per_ccys['BTCUSDT'].head()

KeyboardInterrupt: 

Пока что будем работать только с `BTCUSDT`:

In [None]:
df = df_per_ccys['BTCUSDT']

Добавляем доходности:

In [None]:
df['return'] = (df['open'] - df['close']) / (df['open'])
df.head()

Объявляем функцию для ресеплирования:

In [None]:
def resample(freq, dataframe: pd.DataFrame) -> pd.DataFrame:
  return dataframe.groupby(pd.Grouper(freq=freq)).agg({'open':'first', 'close':'last', 'high':'max', 'low':'min', 'volume': 'sum', 'return': 'std'}).rename(columns={'return': 'vol^2'})

Пока что будем работать только с часовыми данными, выбрасываем точки, где не было торгов:

In [None]:
df_1h = resample('1h', df)
df_1h['return'] = (df_1h['open'] - df_1h['close']) / (df_1h['open'])
df_1h = df_1h.dropna()
df_1h = df_1h[df_1h['volume'] != 0]
df_1h.head()

Объявляем метрику `RMSPE`:

In [None]:
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

# Historical Average

In [None]:
def calculate_HA(realized_vol, l_history=1):
  data = np.full(realized_vol.size, np.nan)

  for i in range(l_history, len(realized_vol)):
    history = realized_vol[i - l_history: i]
    data[i] = np.mean(history)

  return pd.Series(data=data, index=realized_vol.index, name="historical_average")

In [None]:
l_min = None
value_min = None
for l in range(1, 30):
    value = rmspe(df_1h['vol^2'], calculate_HA(df_1h['vol^2'], l))
    if value_min is None or value_min > value:
        value_min = value
        l_min = l
print("Optimal l: ", l_min, " value:", value_min)

# Exponentially Weighted Moving Average

In [None]:
def ewma(data, alpha):
  sum = 0.0
  for x in data:
    sum = sum * alpha + x * (1-alpha)
  return sum

In [None]:
def calculate_EWMA(realized_vol, l_history=3, alpha=0.33):
  data = np.full(realized_vol.size, np.nan)

  for i in range(l_history, len(realized_vol)):
    history = realized_vol[i - l_history: i]
    data[i] = ewma(history, alpha)

  return pd.Series(data=data, index=realized_vol.index, name="ewma_average")


In [None]:
alpha_min = None
l_min = None
value_min = None
for l in range (2, 10):
  for alpha in np.arange(0.0, 1.0, 0.05):
    value = rmspe(df_1h['vol^2'], calculate_EWMA(df_1h['vol^2'], l, alpha))
    if value_min is None or value_min > value:
        value_min = value
        l_min = l
        alpha_min = alpha
print("Optimal l: ", l_min, " alpha,", alpha_min, " value:", value_min)

# GARCH

In [28]:
!pip install arch

Defaulting to user installation because normal site-packages is not writeable


In [None]:
from arch import arch_model
from arch.__future__ import reindexing
from math import sqrt

def garch(returns):

  scaling_const = 100.0 / returns.std()

  am = arch_model(scaling_const * returns,
                  mean='Constant',
                  vol='Garch', p=1, o=0, q=1,
                  dist='skewstudent')
  
  res = am.fit(options={'ftol' : 1e-2}, update_freq=0, disp='off')

  forecasts = res.forecast(horizon=1)

  return sqrt(float(forecasts.variance.iloc[-1])) / scaling_const

In [None]:
def calculate_GARCH(returns, l_history=3):
  data = np.full(returns.size, np.nan)

  for i in range(l_history, len(returns)):
    history = returns[i - l_history: i]
    data[i] = garch(history)

  return pd.Series(data=data, index=returns.index, name="historical_average")


In [None]:
rmspe(df_1h.dropna()['vol^2'], calculate_GARCH(df_1h.dropna()['return']))