In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import gresearch_crypto

import warnings
warnings.filterwarnings("ignore")

import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [None]:
df_train = pd.read_csv("../input/g-research-crypto-forecasting/train.csv")
df_asset = pd.read_csv("../input/g-research-crypto-forecasting/asset_details.csv")
df_supplemental = pd.read_csv("../input/g-research-crypto-forecasting/supplemental_train.csv")

df_train.head()

In [None]:
df_asset.head()

In [None]:
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']
    
def get_features(df):
    df_feat = df[['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']].copy()
    df_feat['Upper_Shadow'] = upper_shadow(df_feat)
    df_feat['Lower_Shadow'] = lower_shadow(df_feat)
    return df_feat


def get_Xy_and_model_for_asset(df_train, asset_id):
    df = df_train[df_train.Asset_ID==asset_id]
    df_proc = get_features(df)
    df_proc['y'] = df['Target']
    df_proc.dropna(how='any', inplace=True)
    
    X = df_proc.drop('y', axis=1)
    y = df_proc.y
    model = xgb.XGBRegressor()
    
    model.fit(X,y)
    return X,y, model 

In [None]:
Xs = {}
ys = {}
models = {}

for asset_id, asset_name in zip(df_asset['Asset_ID'], df_asset['Asset_Name']):
    print(f"Training model for {asset_name:<16} (ID={asset_id:<2})")
    try:
        X,y, model = get_Xy_and_model_for_asset(df_train, asset_name)
        Xs[asset_id], ys[asset_id], models[asset_id] = X,y, model
    except:
        Xs[asset_id],ys[asset_id], models[asset_id] = None, None, None


In [None]:
x = get_features(df_train.iloc[1])
y_pred = models[0].predict(pd.DataFrame([x]))
y_pred[0]

In [None]:
env = gresearch_crypto.make_env()
iter_test = env.iter_test()

for i, (df_test, df_pred) in enumerate(iter_test):
    for j , row in df_test.iterrows():
        
        if models[row['Asset_ID']] is not None:
            try:
                model = models[row['Asset_ID']]
                x_test = get_features(row)
                y_pred = model.predict(pd.DataFrame([x_test]))[0]
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = y_pred
            except:
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0
                traceback.print_exc()
        else: 
            df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0
        
    env.predict(df_pred)

In [None]:
#df = df_train.copy()
#df = df[df.Asset_ID==1].drop(['timestamp','Asset_ID'],axis=1)
#df.dropna(how='any', inplace=True)
#X = df.drop('Target', axis=1)
#y = df.Target


#X_train,X_valid, y_train, y_valid = train_test_split(X,y ,test_size=0.25, random_state=7)
#model = xgb.XGBRegressor()
#model.fit(X_train,y_train)