In [1]:
import sys
sys.path.append('..') # for import src

import os
import cloudpickle
import lzma
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_predict
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import lightgbm as lgb
import talib

import src
from src.ml_utils import (
    fetch_ohlcv, 
    visualize_result, 
    normalize_position, 
    calc_position_cv,
    get_feature_columns,
    get_symbols,
    unbiased_rank,
    ewm_finite,
)
cloudpickle.register_pickle_by_value(src) # for model portability

In [29]:
# symbols = 'BTC'.split(',')
symbols = 'ATOM'.split(',')
# symbols = os.getenv('ALPHAPOOL_SYMBOLS').split(',') # 売買代金が多く、古いもの
df = fetch_ohlcv(symbols=symbols, with_target=False, interval_sec=300, price_type=None)
df.to_pickle('/tmp/df.pkl')
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,op,hi,lo,cl,volume,execution_start_at
timestamp,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-11-12 00:30:00+00:00,ATOM,4.0580,4.2375,4.0530,4.2375,11876.011855,2019-11-12 00:35:00+00:00
2019-11-12 00:35:00+00:00,ATOM,4.2405,4.3070,4.2405,4.3070,4017.311465,2019-11-12 00:40:00+00:00
2019-11-12 00:40:00+00:00,ATOM,4.3070,4.3180,4.3070,4.3180,0.000000,2019-11-12 00:45:00+00:00
2019-11-12 00:45:00+00:00,ATOM,4.3180,4.3285,4.3180,4.3270,0.000000,2019-11-12 00:50:00+00:00
2019-11-12 00:50:00+00:00,ATOM,4.3270,4.3360,4.3270,4.3345,2202.970665,2019-11-12 00:55:00+00:00
...,...,...,...,...,...,...,...
2022-08-30 04:05:00+00:00,ATOM,11.6000,11.6160,11.5875,11.6070,18442.659740,2022-08-30 04:10:00+00:00
2022-08-30 04:10:00+00:00,ATOM,11.6070,11.6355,11.6070,11.6200,149026.916845,2022-08-30 04:15:00+00:00
2022-08-30 04:15:00+00:00,ATOM,11.6200,11.6710,11.6105,11.6635,206299.359880,2022-08-30 04:20:00+00:00
2022-08-30 04:20:00+00:00,ATOM,11.6635,11.7130,11.6550,11.6960,409023.372635,2022-08-30 04:25:00+00:00


In [40]:
df = pd.read_pickle('/tmp/df.pkl')

import numba
@numba.njit
def calc_force_entry_price(entry_price=None, lo=None, pips=None):
    y = entry_price.copy()
    y[:] = np.nan
    for i in range(entry_price.size):
        for j in range(i + 1, entry_price.size):
            if round(lo[j] / pips[j]) < round(entry_price[j - 1] / pips[j]):
                y[i] = entry_price[j - 1]
                break
    return y


def estimate_pips(x, window=None, reverse=False):
    if reverse:
        return estimate_pips(x[::-1], window=window)[::-1]
    scale = 10 ** 8
    x = np.round(x * scale).astype(np.long)
    if x.ndim != 1:
        x = np.gcd.reduce(x, axis=1)
    x2 = x.copy()
    for i in range(1, window):
        x2[i:] = np.gcd(x2[i:], x[:-i])
    return x2 * 1.0 / scale

df['pips'] = estimate_pips(df[['op', 'hi', 'lo', 'cl']], window=1000)

df['buy_price'] = df['cl'] - df['pips'] * 1
df['sell_price'] = df['cl'] + df['pips'] * 1
# df['buy_price'] = df['cl'] - df['cl'].rolling(12, 1).std() * 0.5
# df['sell_price'] = df['cl'] + df['cl'].rolling(12, 1).std() * 0.5
# df['buy_price'] = np.floor(df['buy_price'] / df['pips']) * df['pips']
# df['sell_price'] = np.ceil(df['sell_price'] / df['pips']) * df['pips']

df['buy_fep'] = calc_force_entry_price(
    entry_price=df['buy_price'].values,
    lo=df['lo'].values,
    pips=df['pips'].values
)

df['sell_fep'] = -calc_force_entry_price(
    entry_price=-df['sell_price'].values,
    lo=-df['hi'].values,
    pips=df['pips'].values
)

df['signal'] = -np.sign(df['cl'] - df['cl'].shift(1))
df['ret'] = df['cl'].shift(-1) / df['cl'] - 1

df['buy_fep_prev'] = df['buy_fep'].shift(1)
df['sell_fep_prev'] = df['sell_fep'].shift(1)

df = df.dropna()

print('corr', df['ret'].corr(df['signal']))

df['buy_cost'] = df['buy_fep'] / df['cl'] - 1
df['sell_cost'] = -(df['sell_fep'] / df['cl'] - 1)
df['buy_cost_early'] = df['buy_fep_prev'] / df['cl'] - 1
df['sell_cost_early'] = -(df['sell_fep_prev'] / df['cl'] - 1)
display(df.describe())

print('up')
display(df.loc[df['ret'] > 0].describe())
print('down')
display(df.loc[df['ret'] < 0].describe())

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.round(x * scale).astype(np.long)


corr -0.007264933295255931


Unnamed: 0,op,hi,lo,cl,volume,pips,buy_price,sell_price,buy_fep,sell_fep,signal,ret,buy_fep_prev,sell_fep_prev,buy_cost,sell_cost,buy_cost_early,sell_cost_early
count,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0,294364.0
mean,13.88218,13.924415,13.839652,13.882208,157128.9,0.000499,13.881709,13.882707,13.886649,13.877993,-0.002952,1.4e-05,13.886624,13.877968,0.00047,0.000433,0.000474,0.000429
std,11.086261,11.122964,11.049698,11.086254,403594.4,1.5e-05,11.086254,11.086255,11.087847,11.084608,0.982414,0.004657,11.08786,11.084621,0.00211,0.002042,0.00441,0.004408
min,1.1105,1.1225,1.062,1.1035,0.0,0.00025,1.103,1.104,1.1205,1.104,-1.0,-0.250196,1.1205,1.104,-0.00075,-0.000737,-0.153305,-0.333732
25%,4.5245,4.5335,4.516,4.525,3069.36,0.0005,4.5245,4.5255,4.527,4.522375,-1.0,-0.001902,4.527,4.522,-9.5e-05,-9.7e-05,-0.00106,-0.001109
50%,9.7185,9.7625,9.68075,9.7185,38340.44,0.0005,9.718,9.719,9.72225,9.715,-0.0,0.0,9.722,9.715,-3.7e-05,-3.8e-05,4.9e-05,2.5e-05
75%,22.717,22.8005,22.645,22.7175,150692.0,0.0005,22.717,22.718,22.724,22.711125,1.0,0.001909,22.724,22.711125,-1.7e-05,-1.7e-05,0.002058,0.002018
max,44.444,44.8025,44.228,44.444,17998820.0,0.0005,44.4435,44.4445,44.4435,44.4445,1.0,0.180636,44.4435,44.4445,0.16357,0.111544,0.333632,0.152693


up


Unnamed: 0,op,hi,lo,cl,volume,pips,buy_price,sell_price,buy_fep,sell_fep,signal,ret,buy_fep_prev,sell_fep_prev,buy_cost,sell_cost,buy_cost_early,sell_cost_early
count,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0,142485.0
mean,14.118189,14.161166,14.074093,14.117396,161285.5,0.000499,14.116896,14.117895,14.126928,14.117881,4.9e-05,0.003039,14.123311,14.114383,0.000992,-6.8e-05,0.000638,0.000277
std,11.051289,11.087743,11.014691,11.050863,418731.5,1.4e-05,11.050862,11.050863,11.054113,11.050874,0.987396,0.003643,11.052737,11.049598,0.002872,0.000176,0.004664,0.00449
min,1.1105,1.1225,1.062,1.1035,0.0,0.00025,1.103,1.104,1.284,1.104,-1.0,1.2e-05,1.1205,1.104,-0.0005,-0.000737,-0.147304,-0.333732
25%,4.683,4.692,4.672,4.6825,3949.92,0.0005,4.682,4.683,4.6895,4.683,-1.0,0.000871,4.687,4.68,-6.1e-05,-0.000106,-0.001081,-0.001188
50%,10.266,10.2935,10.2325,10.2625,41526.68,0.0005,10.262,10.263,10.2695,10.263,0.0,0.001997,10.268,10.262,-2.2e-05,-4.9e-05,0.000254,-2.3e-05
75%,22.9625,23.0275,22.878,22.956,155094.0,0.0005,22.9555,22.9565,22.975,22.9565,1.0,0.003951,22.974,22.9575,0.000235,-2.2e-05,0.002329,0.001853
max,44.2425,44.5445,44.2,44.3525,17998820.0,0.0005,44.352,44.353,44.352,44.353,1.0,0.180636,44.242,44.243,0.16357,0.024881,0.333632,0.147219


down


Unnamed: 0,op,hi,lo,cl,volume,pips,buy_price,sell_price,buy_fep,sell_fep,signal,ret,buy_fep_prev,sell_fep_prev,buy_cost,sell_cost,buy_cost_early,sell_cost_early
count,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0,141618.0
mean,14.351654,14.395769,14.308038,14.352494,163468.9,0.000499,14.351994,14.352993,14.352006,14.343361,-0.004357,-0.003028,14.355561,14.346872,-6.6e-05,0.000927,0.000297,0.000565
std,11.144655,11.181481,11.108238,11.145033,400174.4,1.4e-05,11.145032,11.145033,11.145021,11.141876,0.987499,0.003643,11.146355,11.143114,0.000164,0.002786,0.004241,0.00442
min,1.1635,1.1635,1.0625,1.121,0.0,0.00025,1.1205,1.1215,1.1205,1.1215,-1.0,-0.250196,1.163,1.1215,-0.00075,-0.000446,-0.153305,-0.176825
25%,4.746625,4.755,4.7365,4.747,4200.944,0.0005,4.7465,4.7475,4.7465,4.7405,-1.0,-0.003973,4.748,4.743,-0.000104,-6.2e-05,-0.001156,-0.001156
50%,10.48,10.5135,10.4445,10.48225,44069.91,0.0005,10.48175,10.48275,10.48175,10.473,-0.0,-0.002009,10.4825,10.47425,-4.7e-05,-2.3e-05,-2.3e-05,0.000223
75%,23.227375,23.303375,23.158,23.226,158992.1,0.0005,23.2255,23.2265,23.2255,23.211875,1.0,-0.000868,23.23775,23.22,-2.1e-05,-1.2e-05,0.001846,0.002247
max,44.444,44.8025,44.228,44.444,16999000.0,0.0005,44.4435,44.4445,44.4435,44.4445,1.0,-1.2e-05,44.4435,44.4445,0.021177,0.111544,0.176769,0.152693
