In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import datetime as dt
import requests
import json
import os

# Prepare data

In [2]:
def get_100_candles(ticker: str, tf: str, timestamp_ms_from: int):
    params = {
        'instId': ticker,
        'bar': tf,
        'after': timestamp_ms_from,
    }
    responce = requests.get('https://www.okx.com/api/v5/market/history-mark-price-candles', params)
    if not responce.ok:
        print('Bad request', responce.text)
        return
    candles = []
    for cs in json.loads(responce.text)['data']:
        candle = {
            'dt': dt.datetime.fromtimestamp(int(cs[0]) // 1000),
            'o': float(cs[1]),
            'h': float(cs[2]),
            'l': float(cs[3]),
            'c': float(cs[4]),
        }
        candles.append(candle)
    return candles

In [3]:
def get_timestamp_ms_range(from_date: dt.datetime, till_date: dt.datetime, delta: dt.timedelta):
    ts_range = []
    t = till_date
    while t > from_date:
        timestamp_ms = int(t.timestamp() * 1000)
        ts_range.append(timestamp_ms)
        t -= 100 * delta
    return ts_range

In [4]:
def get_candles_history(ticker: str, tf: str, from_date: dt.datetime, till_date: dt.datetime):
    """
    Get candlestick history of a ticker from OKX 
    """
    tf_to_timedelta = {
        '1D': dt.timedelta(days=1),
        '4H': dt.timedelta(hours=4),
    }
    candles = []
    ts_range = get_timestamp_ms_range(from_date, till_date, tf_to_timedelta[tf])
    for ts_ms in ts_range:
        new_candles = get_100_candles(ticker, tf, ts_ms)
        if new_candles:
            candles.extend(new_candles)
    if len(candles) == 0:
        return 
    df_candles = pd.DataFrame(candles).sort_values('dt')
    return df_candles[df_candles['dt'].between(from_date, till_date)].drop_duplicates('dt').reset_index(drop=True)

In [5]:
DATA_DIR = 'data'
DATA_FILENAME = 'crypto.csv'

path_to_data = os.path.join(DATA_DIR, DATA_FILENAME)
if not os.path.exists(path_to_data):
    tickers = ['BTC-USDT', 'ETH-USDT', 'DOT-USDT', 'OKB-USDT', 'XRP-USDT',
            'SOL-USDT', 'DOGE-TRX', 'TRX-USDT', 'LTC-USDT', 'TON-USDT',
            ]
    tf = '1D'
    date_from = dt.datetime(2020, 1, 1)
    date_till = dt.datetime(2025, 1, 1)
    data = []
    for ticker in tickers:
        print(ticker)
        df = get_candles_history(ticker, tf, date_from, date_till)
        if df is not None:
            ts = pd.Series(data=df['c'].values, index=df['dt'], name=ticker)
            data.append(ts)
    data = pd.concat(data, axis=1)
    data.to_csv(path_to_data)

data = pd.read_csv(path_to_data, index_col='dt', parse_dates=['dt'])
data.shape

(1826, 9)

In [6]:
data

Unnamed: 0_level_0,BTC-USDT,ETH-USDT,DOT-USDT,OKB-USDT,XRP-USDT,SOL-USDT,TRX-USDT,LTC-USDT,TON-USDT
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01 19:00:00,7131.9,129.56,,,0.19050,,0.01312,41.17,
2020-01-02 19:00:00,7319.7,132.47,,,0.19203,,0.01337,41.76,
2020-01-03 19:00:00,7318.9,133.29,,,0.19249,,0.01335,42.28,
2020-01-04 19:00:00,7467.4,137.24,,,0.19628,,0.01377,44.01,
2020-01-05 19:00:00,7520.9,140.18,,,0.21275,,0.01405,44.06,
...,...,...,...,...,...,...,...,...,...
2024-12-27 19:00:00,94445.8,3368.12,6.947,53.83,2.17300,190.18,0.25690,99.75,5.784
2024-12-28 19:00:00,94504.1,3373.97,6.987,50.04,2.16450,195.12,0.26232,100.75,5.740
2024-12-29 19:00:00,92119.2,3323.88,6.613,49.57,2.02420,187.31,0.25108,98.38,5.497
2024-12-30 19:00:00,95413.9,3412.67,6.842,50.08,2.12860,198.42,0.25659,104.21,5.594


# Evaluate strategies

In [7]:
t = list(range(15))
min_hist = 5
period = 1
size = len(t)

res = []

for i in range(min_hist, size - period):
    res.append((t[:i], t[i:i + period]))
res

[([0, 1, 2, 3, 4], [5]),
 ([0, 1, 2, 3, 4, 5], [6]),
 ([0, 1, 2, 3, 4, 5, 6], [7]),
 ([0, 1, 2, 3, 4, 5, 6, 7], [8]),
 ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9]),
 ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10]),
 ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [11]),
 ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12]),
 ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [13])]

In [8]:
def timeseries_split(df, period, min_hist):
    size = df.shape[0]
    for i in range(min_hist, size - period):
        yield df.iloc[:i], df.iloc[i: i + period]

In [9]:
class MarkowitzPortfolio:
    def __init__(self):
        pass
    def build(self):
        pass

In [10]:
period = 7
min_hist = 14

metrics_history = []
for train, test in timeseries_split(data, period, min_hist):
    emty_cols = train.columns[train.isna().sum(axis=0) > 0]
    train = train.drop(emty_cols, axis=1)
    test = test.drop(emty_cols, axis=1)

    n = train.shape[1]
    now = train.iloc[-1].values.reshape((-1, 1))
    # future = test.iloc[-1].values.reshape((-1, 1))
    future = test.values.T

    dt = train.iloc[-1].name
    print(dt)
    print('train history length', train.shape[0])

    # uniform
    # w = np.ones((n, 1)) / n

    # minvar 
    ret = train.diff(period) / train.shift(period)
    mu_hat = ret.mean(axis=1).values.reshape((-1, 1))
    cov_hat = ret.cov().values
    cov_hat_inv = np.linalg.inv(cov_hat)
    e = np.ones((n, 1))
    w = cov_hat_inv @ e / (e.T @ cov_hat_inv @ e)

    # random fractions
    # w = np.random.randn(n, 1) 
    # w = w / w.sum()


    x = w / now
    assert np.isclose(w.sum(), 1)

    actual_return = (future - now) / now
    pnl = x * actual_return * now

    report = pd.DataFrame(index=train.columns)
    report['open'] = now
    report['close'] = future[:, -1, np.newaxis]
    report['diff'] = future[:, -1, np.newaxis] - now
    report['return'] = actual_return[:, -1]
    report['w'] = w
    report['x'] = x
    report['max_dropdown'] = actual_return.min(axis=1)
    report['pnl'] = pnl[:, -1, np.newaxis]
    report['total_pnl'] = pnl[:, -1].sum()
    
    metrics = {
        'dt': dt,
        'total_pnl':pnl[:, -1].sum(),
        'max_dropdown': pnl.sum(axis=0).min(),
    }
    metrics_history.append(metrics)
    print(report)
    print('*'*40 + '\n'*3 )
    # break

2020-01-14 19:00:00
train history length 14
                open       close       diff    return         w          x  \
BTC-USDT  8763.20000  8654.50000 -108.70000 -0.012404  1.025259   0.000117   
ETH-USDT   162.50000   167.43000    4.93000  0.030338 -0.374189  -0.002303   
XRP-USDT     0.23431     0.23528    0.00097  0.004140 -0.389496  -1.662309   
TRX-USDT     0.01692     0.01717    0.00025  0.014775  1.036307  61.247473   
LTC-USDT    56.87000    58.07000    1.20000  0.021101 -0.297882  -0.005238   

          max_dropdown       pnl  total_pnl  
BTC-USDT     -0.014858 -0.012717  -0.016656  
ETH-USDT     -0.000123 -0.011352  -0.016656  
XRP-USDT     -0.028765 -0.001612  -0.016656  
TRX-USDT     -0.028369  0.015312  -0.016656  
LTC-USDT     -0.003868 -0.006286  -0.016656  
****************************************



2020-01-15 19:00:00
train history length 15
                open       close       diff    return         w          x  \
BTC-USDT  8710.30000  8357.50000 -352.80000 -

In [11]:
pd.DataFrame(metrics_history)

Unnamed: 0,dt,total_pnl,max_dropdown
0,2020-01-14 19:00:00,-0.016656,-0.047283
1,2020-01-15 19:00:00,-0.050937,-0.050937
2,2020-01-16 19:00:00,-0.042092,-0.066408
3,2020-01-17 19:00:00,-0.059294,-0.059505
4,2020-01-18 19:00:00,-0.008899,-0.025421
...,...,...,...
1800,2024-12-19 19:00:00,-0.007250,-0.031513
1801,2024-12-20 19:00:00,-0.013326,-0.033624
1802,2024-12-21 19:00:00,0.010543,-0.016038
1803,2024-12-22 19:00:00,-0.006536,-0.006536


In [12]:
pd.DataFrame(metrics_history)['total_pnl'].describe()

count    1805.000000
mean        0.015477
std         0.095067
min        -0.383219
25%        -0.034920
50%         0.006263
75%         0.054212
max         0.838207
Name: total_pnl, dtype: float64