# Imports

In [None]:
import time 
import traceback
from datetime import datetime

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

from lightgbm import LGBMRegressor

# Loading Datasets

In [None]:
data_folder = "../input/time-series-crypto-forecasting/"
!ls  $data_folder

In [None]:
asset_details = pd.read_csv(data_folder + 'asset_details.csv')
asset_details.head()

In [None]:
df_train = pd.read_csv(data_folder + 'train.csv')
df_train.head()

In [None]:
df_test = pd.read_csv(data_folder + 'test.csv')
df_test.head()

# Feature Extraction

In [None]:
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])
def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

def get_features(df):
    df_feat = df[['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']].copy()
    df_feat['Upper_Shadow'] = upper_shadow(df_feat)
    df_feat['Lower_Shadow'] = lower_shadow(df_feat)
    return df_feat

# Prediction



## Without tuning

In [None]:
def get_Xy_and_model_for_asset(df_train, asset_id):
    df = df_train[df_train["Asset_ID"] == asset_id]
    
    df_proc = get_features(df)
    df_proc['y'] = df['Target']
    df_proc.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_proc = df_proc.dropna(how="any")
    
    X = df_proc.drop("y", axis=1)
    y = df_proc["y"]   
    
    model = LGBMRegressor()
    model.fit(X, y)
    
    return X, y, model

### Fitting

In [None]:
Xs = {}
ys = {}
non_tuned_models = {}

for asset_id, asset_name in zip(asset_details['Asset_ID'], asset_details['Asset_Name']):
    print(f"Training model for {asset_name:<16} (ID={asset_id:<2})")
    X, y, model = get_Xy_and_model_for_asset(df_train, asset_id)
    
    try:
        Xs[asset_id], ys[asset_id], non_tuned_models[asset_id] = X, y, model
    except: 
        Xs[asset_id], ys[asset_id], non_tuned_models[asset_id] = None, None, None 

### Pedicting

In [None]:
df_non_tuned_pred = []

for j , row in df_test.iterrows():     
    if(j%100000 == 0): 
        print('100000 complete ...')
    if non_tuned_models[row['Asset_ID']] is not None:
        model = non_tuned_models[row['Asset_ID']]
        x_test = get_features(row)
        y_pred = model.predict(pd.DataFrame([x_test]))[0]
        df_non_tuned_pred.append(y_pred)
    else:       
        df_non_tuned_pred.append(0)    
        
pd.DataFrame(df_non_tuned_pred).to_csv('non_tuned_predicted.csv')

## With Tuning

In [None]:
hyperparams = [
    [0.01, 111],
    [0.01, -1],
    [0.05, 151],
    [0.01, 141],
    [0.01, 41],
    [-1, 51],
    [0.05, 21],
    [0.05, -1],
    [0.01, 61],
    [0.05, 41],
    [0.05, 21],
    [0.01, 71],
    [0.01, 61],
    [0.01, 21]
]

def get_tuned_Xy_and_model_for_asset(df_train, asset_id, idx):
    df = df_train[df_train["Asset_ID"] == asset_id]
    
    df_proc = get_features(df)
    df_proc['y'] = df['Target']
    df_proc.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_proc = df_proc.dropna(how="any")
    
    X = df_proc.drop("y", axis=1)
    y = df_proc["y"]   
    
    if(hyperparams[idx][0] != -1 and hyperparams[idx][1] != -1):
        model = LGBMRegressor(learning_rate=hyperparams[idx][0], num_leaves=hyperparams[idx][1])
    else:
        if(hyperparams[idx][0] == -1):
            model = LGBMRegressor(num_leaves=hyperparams[idx][1])
        else:
            model = LGBMRegressor(learning_rate=hyperparams[idx][0])
    model.fit(X, y)
    return X, y, model


### Fitting

In [None]:
Xs = {}
ys = {}
models = {}

count = 0

for asset_id, asset_name in zip(asset_details['Asset_ID'], asset_details['Asset_Name']):
    print(f"Training model for {asset_name:<16} (ID={asset_id:<2})")
    X, y, model = get_tuned_Xy_and_model_for_asset(df_train, asset_id, count)
    
    count += 1
    try:
        Xs[asset_id], ys[asset_id], models[asset_id] = X, y, model
    except: 
        Xs[asset_id], ys[asset_id], models[asset_id] = None, None, None 

### Predicting

In [None]:
df_tuned_pred = []

for j , row in df_test.iterrows():     
    if(j%100000 == 0): 
        print('100000 complete ...')
    if models[row['Asset_ID']] is not None:
        model = models[row['Asset_ID']]
        x_test = get_features(row)
        y_pred = model.predict(pd.DataFrame([x_test]))[0]
        df_tuned_pred.append(y_pred)
    else:       
        df_tuned_pred.append(0)    
        
pd.DataFrame(df_tuned_pred).to_csv('tuned_predicted.csv')

# Scratch: script for fine tuning using GRID search

In [None]:
# parameters = {
#     'max_depth': range (2, 10, 1),
#     'num_leaves': range(21, 161, 10),
#     'learning_rate': [0.1, 0.01, 0.05]
# }

# new_models = {}

# for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):
#     print("GridSearchCV for: " + asset_name)
    
#     grid_search = GridSearchCV(
#         estimator=get_Xy_and_model_for_asset(df_train, asset_id)[2], 
#         param_grid=parameters,
#         n_jobs = -1,
#         cv = 5,
#         verbose=True
#     )
    
#     grid_search.fit(Xs[asset_id], ys[asset_id])
#     new_models[asset_id] = grid_search.best_estimator_
#     grid_search.best_estimator_

In [None]:
# for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):
#     print(f"Tuned model for {asset_name:<1} (ID={asset_id:})")
#     print(new_models[asset_id])