In [None]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression
import gresearch_crypto


TRAIN_CSV = '../input/g-research-crypto-forecasting/train.csv'
ASSET_DETAILS_CSV = '../input/g-research-crypto-forecasting/asset_details.csv'

def read_csv_strict(file_name='../input/g-research-crypto-forecasting/train.csv'):
    df = pd.read_csv(file_name)
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
    #df = df[((df['datetime'] < '2019-03-13 00:00:00') & (df['datetime'] > '2019-02-13 00:00:00')) | ((df['datetime'] < '2019-12-13 00:00:00') & (df['datetime'] > '2019-06-13 00:00:00')) |((df['datetime'] < '2020-03-13 00:00:00') & (df['datetime'] > '2020-02-13 00:00:00')) |((df['datetime'] < '2020-08-13 00:00:00') & (df['datetime'] > '2020-07-13 00:00:00')) |((df['datetime'] < '2021-03-13 00:00:00') & (df['datetime'] > '2021-01-13 00:00:00')) |((df['datetime'] < '2021-05-13 00:00:00') & (df['datetime'] > '2021-04-13 00:00:00'))]
    df = df[df['datetime'] < '2021-06-13 00:00:00']
    df = df[df['datetime'] > '2020-06-13 00:00:00']
    return df

In [None]:
df_train = read_csv_strict()
df_asset_details = pd.read_csv(ASSET_DETAILS_CSV).sort_values("Asset_ID")

In [None]:

def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

# A utility function to build features from the original df
# It works for rows to, so we can reutilize it.
def get_features(df):
    df_feat = df[['Count', 'Volume','Asset_ID']].copy()
    df_feat["day"] = df['datetime'].dt.day
    df_feat["week"] = df['datetime'].dt.month
    df_feat["weekday"] = df['datetime'].dt.weekday

    df_feat['Upper_Shadow'] = upper_shadow(df)
    df_feat['Lower_Shadow'] = lower_shadow(df)
    df_feat["high_div_low"] = df["High"] / df["Low"]
    df_feat["open_sub_close"] = df["Open"] - df["Close"]
    #df_feat=(df_feat-df_feat.min())/(df_feat.max()-df_feat.min())

    return df_feat

In [None]:
best_params = {
    "objective": "regression",
    "n_estimators" : 5000,     # <-- (9) change from 200 to 500
    "num_leaves" : 800,       # <-- (10) Added parameter
    "learning_rate" : 0.07,   # <-- (10) Added parameter
    "random_seed" : 1234}

In [None]:
def get_Xy_and_model_for_asset(df_train, asset_id):
    df = df_train[df_train["Asset_ID"] == asset_id]
    
    # TODO: Try different features here!
    df_proc = get_features(df)
    df_proc['y'] = df['Target']
    df_proc = df_proc.replace([np.inf, -np.inf], np.nan).dropna(how="any")
    
    X = df_proc.drop("y", axis=1)
    y = df_proc["y"]

    # TODO: Try different models here!
    model = LGBMRegressor(**best_params)
    model.fit(X, y)
    return X, y, model

In [None]:
Xs = {}
ys = {}
models = {}

for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):
    print(f"Training model for {asset_name:<16} (ID={asset_id:<2})")
    X, y, model = get_Xy_and_model_for_asset(df_train, asset_id)    
    Xs[asset_id], ys[asset_id], models[asset_id] = X, y, model

In [None]:
env = gresearch_crypto.make_env()

In [None]:
iter_test = env.iter_test()

In [None]:
for i, (df_test, df_pred) in enumerate(iter_test):
    df_test['datetime'] = pd.to_datetime(df_test['timestamp'], unit='s')
    df = df_test[['Count', 'Volume','Asset_ID','row_id']].copy()
    df["day"] = df_test['datetime'].dt.day
    df["week"] = df_test['datetime'].dt.month
    df["weekday"] = df_test['datetime'].dt.weekday

    df['Upper_Shadow'] = upper_shadow(df_test)
    df['Lower_Shadow'] = lower_shadow(df_test)
    df["high_div_low"] = df_test["High"] / df_test["Low"]
    df["open_sub_close"] = df_test["Open"] - df_test["Close"]
    df.head()
    for j,row in df.iterrows():
        model = models[int(row['Asset_ID'])]
        rows=row.to_frame().T
        x_input=rows[['Count', 'Volume','Asset_ID',"day","week","weekday",'Upper_Shadow','Lower_Shadow',"high_div_low","open_sub_close"]].copy()
        y_pred = model.predict(x_input)
        df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = y_pred
    
    
    '''
    for i in range(14):
        model = models[i]
        x_test=df[df["Asset_ID"] == i]
        x_test = x_test[['Count', 'Volume','Asset_ID',"day","week","weekday",'Upper_Shadow','Lower_Shadow',"high_div_low","open_sub_close"]].copy()
        x_test.head()
        y_pred = model.predict(x_test)
        print(y_pred)
        for j,row in x_test.iterrows():
            df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = y_pred[0]
    '''
    env.predict(df_pred)