In [None]:
!cp ../input/my-talibinstall/ta-lib-0.4.0-src.tar.gzh  ./ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz > null
!cd ta-lib && ./configure --prefix=/usr > null && make  > null && make install > null
!cp ../input/my-talibinstall/TA-Lib-0.4.21.tar.gzh TA-Lib-0.4.21.tar.gz
!pip install TA-Lib-0.4.21.tar.gz > null
!pip install ../input/my-talibinstall/numpy-1.21.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl >null
import talib as ta

In [None]:
import traceback
import numpy as np
import pandas as pd
import gc
import time
import os
import xgboost as xgb
import gresearch_crypto

In [None]:
param_version=58
tuned=True #use model set new-xgbcrypto-tune
alldata = True  #use alldata-trained version
pre_minute= 200 #df_test batch size

In [None]:
ASSET_DETAILS_CSV = '../input/c/c/g-research-crypto-forecasting/asset_details.csv'
df_asset_details = pd.read_csv(ASSET_DETAILS_CSV).sort_values("Asset_ID")

## feature engineering

- [https://mrjbq7.github.io/ta-lib/doc_index.html](https://mrjbq7.github.io/ta-lib/doc_index.html)

import pickle
with open(f"../input/new-xgbcrypto-tune/model_nof_{param_version}/feature_best{param_version}", "rb") as f:
    fdict=pickle.load(f)
print(fdict)
fparam_str=['beta_s', 'beta_l', 'lrtn','fastk1','fastk2','adx','macd_s','macd_l','macd_sig','vol_sum','rsi','std_Crypto_Index','std_lr_15','std_Mkt_lrt_15']
[fdict[f] for f in fparam_str]

In [None]:
####mod feature params
beta_s, beta_l, lrtn,fastk1,fastk2,adx,macd_s,macd_l,macd_sig,vol_sum,rsi,std_Crypto_Index,std_lr_15,std_Mkt_lrt_15 = ('6h', '2d', 30, 15, 30, 40, 15, 40, 15, 15, 40, 30, 240, 10)

adx,std_lr_15

In [None]:

def beta_window(beta):
    num, unit = int(beta[:-1]),beta[-1]
    if unit == 'h':
        width = 60*num
    elif unit == 'd':
        width = 60*24*num
    return width

beta_sw = beta_window(beta_s)
beta_lw = beta_window(beta_l)
(beta_sw,beta_lw)


In [None]:
def log_return(series, periods=5):
    return np.log(series).diff(periods)

def lag_features(df):    
    ####TECH indicators
    df['slowK'], df['slowD'] = ta.STOCH(df.High, df.Low, df.Close, 
                                        fastk_period=fastk1, slowk_period=int(3*fastk1/5), slowd_period=int(3*fastk1/5),
                                        slowk_matype=0, slowd_matype=0)
    df['fastK'], df['fastD'] = ta.STOCHF(df.High, df.Low, df.Close,
                                         fastk_period=fastk2, fastd_period=int(3*fastk2/5), 
                                         fastd_matype=0)
    df[f'rsi_{rsi}'] = ta.RSI(df['Close'], timeperiod=rsi)
    df[f'macd_{macd_s}_{macd_l}'],df[f'macd_signal_{macd_sig}'], df['macd_hist'] = \
                ta.MACD(df['Close'],fastperiod=macd_s, slowperiod=macd_l, signalperiod=macd_sig)
    df[f'adx_{adx}'] = ta.ADX(df['High'], df['Low'],df['Close'], timeperiod=adx)#Average Directional Movement Index

    df[f'vol_sum_{vol_sum}'] = ta.SMA(df['Volume'],vol_sum)*vol_sum
    ####std volatility
    #df[f'std_lr_15_{std_lr_15}'] = ta.STDDEV(df.lr_15,timeperiod=std_lr_15, nbdev=1)
    df[f'std_Mkt_lrt_15_{std_Mkt_lrt_15}'] = ta.STDDEV(df.Mkt_lrt_15,timeperiod=std_Mkt_lrt_15, nbdev=1)
    df[f'std_Crypto_Index_{std_Crypto_Index}'] = ta.STDDEV(df.Crypto_Index,timeperiod=std_Crypto_Index, nbdev=1)


def make_std(df,width):
    df[f'std_lr_15_{width}'] = ta.STDDEV(df.lr_15,timeperiod=width, nbdev=1)
    return df

def beta_resid(df): 
    b = ((ta.MULT(df.Mkt_lrt_15,df.lr_15).mean())/ \
        (ta.MULT(df.Mkt_lrt_15,df.Mkt_lrt_15).mean()))
    if b in [np.nan,np.inf,-np.inf]:
        b=0
    return b 


def get_features(df_feat):
    pd.options.mode.chained_assignment = None  # default='warn'
    df_feat[f"lr_15_resid_{beta_s}"] = ta.SUB(df_feat.lr_15, ta.MULT(df_feat[f"beta_{beta_s}"], df_feat.Mkt_lrt_15)).rename(f"lr_15_resid_{beta_s}")
    df_feat[f"lr_15_resid_{beta_l}"] = ta.SUB(df_feat.lr_15, ta.MULT(df_feat[f"beta_{beta_l}"], df_feat.Mkt_lrt_15)).rename(f"lr_15_resid_{beta_l}")
    df_feat[f"lrtn_index_{lrtn}"] = log_return(df_feat.Crypto_Index, lrtn)
    lag_features(df_feat)
    return df_feat

### Load tuned models

In [None]:
from os.path import exists
models = {}

def model_reload_train(type: str):
    if alldata:
        mod_suffix = "_alldata.json"
    else:
        mod_suffix = ".json"
    if tuned:
        print('use model folder new-xgbcrypto-tune')
        mod_folder = f"../input/new-xgbcrypto-tune/model_nof_{param_version}"
    else:
        print('use model folder mytrainedxgb')
        mod_folder = f"../input/mytrainedxgb/model_nof_{param_version}"
        
    for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):            
        model_file = mod_folder + f"/model_{asset_id}"+mod_suffix
        if exists(model_file):
            print(f"{model_file} for {asset_name} exists")
            model = xgb.Booster()
            model.load_model(model_file)
            models[asset_id] = model
        

model_reload_train(type='xgb')


# Submit To Kaggle

Take the contiguous pre-minutes `supplemental_train` before the API test set as the previous info for calculating the lag_features.

In [None]:
######################################################
pre_minute_beta = beta_lw + 15
add_weight_map = dict(zip(df_asset_details.Asset_ID, 
                        df_asset_details.Weight/df_asset_details.Weight.sum()))

###load sup_train
sup_train = pd.read_csv('../input/c/c/g-research-crypto-forecasting/supplemental_train.csv')
sup_train = sup_train.set_index("timestamp")
ind = sup_train.index.unique()
###consistent timestamp for all 14 assets
def reindex(df):
    df = df.reindex(range(ind[0],ind[-1]+60,60),method='nearest')
    df = df.fillna(method="ffill").fillna(method="bfill")
    return df
sup_train = sup_train.groupby('Asset_ID').apply(reindex).reset_index(0, drop=True).sort_index()
sup_train = sup_train.iloc[(-14*pre_minute_beta):,:]
#add weight
sup_train['Weight'] = sup_train['Asset_ID'].map(add_weight_map)
sup_train.drop('Target',axis=1, inplace=True)
sup_train.set_index('Asset_ID',append=True, inplace=True)
#######################################add lr_15,mkt_lr_15,crypto_index,beta_s,beta_l
lr_15 = sup_train.groupby('Asset_ID').apply( 
        lambda x: log_return(x[['Close']],15)
        )
sup_train['lr_15'] = lr_15['Close']

mkt_lr_15 = sup_train.groupby('timestamp').apply( 
    lambda x: x[["lr_15", "Close"]].multiply(x["Weight"], axis="index").sum()
    )
mkt_lr_15.columns = ['Mkt_lrt_15','Crypto_Index']
firsts = sup_train.index.get_level_values('timestamp')
sup_train[['Mkt_lrt_15','Crypto_Index']] = mkt_lr_15.loc[firsts].values
#####placeholder for long window features
sup_train[f"beta_{beta_s}"] = 0
sup_train[f"beta_{beta_l}"] = 0
sup_train[f"std_lr_15_{std_lr_15}"] = 0

In [None]:
from datetime import datetime 

env = gresearch_crypto.make_env()
iter_test = env.iter_test()
pd.options.mode.chained_assignment = None  # default='warn'

start_time = datetime.now()

for i, (df_test, df_pred) in enumerate(iter_test):
    num_asset_test = df_test.shape[0]
    row_asset_id_map = dict(zip(df_test.row_id, df_test.Asset_ID))
    test_timestamp = df_test.timestamp.values[0]
    
    timestamp_list = sup_train.index.get_level_values('timestamp').unique().values
    timestamp_list = np.append(timestamp_list,test_timestamp)
    #######################################format df_test
    ###add weight and index
    df_test['Weight'] = df_test['Asset_ID'].map(add_weight_map)
    ###fillin missing assets as nan
    df_test.set_index(['timestamp','Asset_ID'],inplace=True)
    df_test = df_test.reindex(list(zip([test_timestamp]*14,range(14))))
    ########################################concat to sup_train, add lr_15,mkt_lr_15,crypto_index
    sup_train = pd.concat([sup_train,df_test.drop('row_id',axis=1)],join='outer')
    #########################################fill in missing assets as forward
    if num_asset_test <14:
        #ffill in missing
        sup_train = sup_train.groupby('Asset_ID').apply(lambda x: x.fillna(method="ffill")).iloc[14:,:]
    else:
        sup_train = sup_train.iloc[14:,:]
    
    test_lr_15 = sup_train.loc[timestamp_list[[-16,-1]]].groupby('Asset_ID').apply(
        lambda x: np.log(x[['Close']]).diff()
    )
    sup_train.loc[test_timestamp, 'lr_15'] = test_lr_15.loc[test_timestamp,'Close'].values
    sup_train.loc[test_timestamp, ['Mkt_lrt_15','Crypto_Index']] = \
        sup_train.loc[test_timestamp, ["lr_15", "Close"]].multiply(sup_train.loc[test_timestamp,"Weight"], axis="index").sum().values
    ########################################beta_sl
    beta_short = sup_train[['lr_15','Mkt_lrt_15']].iloc[-14*(beta_sw):,:].groupby('Asset_ID').apply(
        lambda x: beta_resid(x)).rename(f"beta_{beta_s}")
    beta_long = sup_train[['lr_15','Mkt_lrt_15']].iloc[-14*(beta_lw):,:].groupby('Asset_ID').apply(
        lambda x: beta_resid(x)).rename(f"beta_{beta_l}")
    sup_train.loc[test_timestamp, [f"beta_{beta_s}",f"beta_{beta_l}"]] = \
        pd.concat([beta_short,beta_long],axis=1).values
    #####################################long std
    long_std = sup_train.iloc[-14*std_lr_15:,:].groupby('Asset_ID').apply(lambda x: x.lr_15.std())
    sup_train.loc[test_timestamp, f"std_lr_15_{std_lr_15}"] = long_std.values * np.sqrt((std_lr_15-1)/std_lr_15)
    #######################################add features to test timestamp
    sup_train2 = sup_train.iloc[(-14*pre_minute):,:].copy()
    xx_test=sup_train2.groupby('Asset_ID').apply(
        lambda x: get_features(x)
    ).loc[test_timestamp]
    #rdy for prediction
    y_pred=df_test.apply(lambda row: models[row.name[1]].predict(
                            xgb.DMatrix(pd.DataFrame([xx_test.loc[row.name[1],models[row.name[1]].feature_names]]))
                                                                )[-1]
                         ,axis =1)
    #match with row_id
    y_pred.reset_index('timestamp',drop=True,inplace=True)
    df_pred['Target']= y_pred.loc[df_pred['row_id'].map(row_asset_id_map)].values
    env.predict(df_pred)

time_elapsed = datetime.now() - start_time
print('Time elapsed total (hh:mm:ss.ms) {}'.format(time_elapsed))
print(f'time elapsed per iteration {time_elapsed/4}')
print(f'Submission time estimate {129600*time_elapsed/4}')