In [None]:
import os
import random
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
import lightgbm as lgb
import gresearch_crypto
import datetime
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error as MSE
import warnings
warnings.filterwarnings('ignore')

TRAIN_CSV = '/kaggle/input/g-research-crypto-forecasting/train.csv'
ASSET_DETAILS_CSV = '/kaggle/input/g-research-crypto-forecasting/asset_details.csv'

SEED = 2021


In [None]:
def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

fix_all_seeds(SEED)

In [None]:
#df_asset_details = pd.read_csv(ASSET_DETAILS_CSV).sort_values("Asset_ID")
#asset_weight_dict =df_asset_details[['Asset_ID','Weight']].to_dict()

In [None]:
def log_return(series, periods=5):
    return np.log(series).diff(periods=periods)

def get_features(df):
    
    times = pd.to_datetime(df.index,unit="s",infer_datetime_format=True)
    df["hour"] = times.hour  
    df["dayofweek"] = times.dayofweek 
    df["day"] = times.day 
    for i in range(14):
        df[f'Upper_Shadow_{i}'] =  df[f'High_{i}'] - np.maximum(df[f'Close_{i}'], df[f'Open_{i}'])
        df[f'hlco_ratio_{i}'] =  (df[f'High_{i}'] - df[f'Low_{i}'])/(df[f'Close_{i}']-df[f'Open_{i}'])
        df[f'Lower_Shadow_{i}'] =  np.minimum(df[f'Close_{i}'], df[f'Open_{i}']) - df[f'Low_{i}']
        df[f'diff_upper{i}'] =df[f'Upper_Shadow_{i}'].diff().fillna(0)
        df[f'diff_ratio_{i}'] =df[f'hlco_ratio_{i}'].diff().fillna(0)
        df[f'diff_Shadow_{i}'] = df[f'Lower_Shadow_{i}'].diff().fillna(0)
        
        df[f'open2close_{i}'] = df[f'Close_{i}'] / df[f'Open_{i}']
        df[f'high2low_{i}'] = df[f'High_{i}'] / df[f'Low_{i}']
    return df 

In [None]:
import pickle
with open('../input/greserchmodel/models','rb') as web:
    models = pickle.load(web)

In [None]:
print( 'start',datetime.datetime.now().strftime('%Y年%m月%d日 %H:%M:%S'))

env = gresearch_crypto.make_env()
iter_test = env.iter_test()

for i, (df_test, df_pred) in enumerate(iter_test):
    df_test = df_test.fillna(-1)
    df_test.VWAP = df_test.VWAP.replace(np.inf,0)
    df_test.VWAP = df_test.VWAP.replace(-np.inf,0)
    
    dfs = df_test.iloc[:, 2:-1].apply(lambda x: (x-x.mean())/x.std(), axis=0)
    df_test.iloc[:, 2:-1] = dfs 

    #df_test['Weight'] = df_test['Asset_ID'].map(asset_weight_dict['Weight']) /10
    test = pd.DataFrame(index=(df_test['timestamp'].unique()))
    test['timestamp'] = df_test['timestamp'].unique()
    for i in range(14):
        data = df_test[df_test.Asset_ID == i].copy()
        for x in data.columns:
            if x !='timestamp':
                if x != 'Asset_ID' and  x!='group_num' and x!='row_id':
                    test[f"{x}_{i}"]  = data[f"{x}"] 
            else:
                data.set_index("timestamp",inplace=True)  

    #Asset_ID = df_test.Asset_ID.values
    df_proc = get_features(test)
    
    #df_valid.set_index("timestamp",inplace=True) 
    #df_proc = df_proc.fillna(-1)
    #x_pred = pd.DataFrame()
    timestamp= pd.DataFrame()
    for x in df_test.Asset_ID.unique():
        #print('Asset_Id=',x)
        model = models[x]
        
        data = df_test[df_test.Asset_ID == x]
        #timestamp = data['timestamp']
        #row_id = data['row_id'].values
        
        x_test = df_proc[df_proc.index.isin(data.timestamp.values)]
        
        #print('Asset_Id=',x,'  predict')
        x_test['pred'] = model.predict(x_test)
        
        x_test['row_id'] =data.row_id.values
        #x_pred = pd.concat([x_pred,x_test])
      
        for j,row in x_test.iterrows():
            df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = row.pred
        
    #if x == 0:
    #    display(df_pred)
        
    env.predict(df_pred)
    
print( 'end',datetime.datetime.now().strftime('%Y年%m月%d日 %H:%M:%S'))

In [None]:
df_pred