In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import traceback
import gresearch_crypto

#Preprocessing
from sklearn.preprocessing import MinMaxScaler

#Models
from mlxtend.regressor import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
import xgboost as xgb

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/g-research-crypto-forecasting/example_sample_submission.csv
/kaggle/input/g-research-crypto-forecasting/asset_details.csv
/kaggle/input/g-research-crypto-forecasting/example_test.csv
/kaggle/input/g-research-crypto-forecasting/train.csv
/kaggle/input/g-research-crypto-forecasting/supplemental_train.csv
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/__init__.py


In [2]:
df_train = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')
df_train.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target
0,1514764860,2,40.0,2376.58,2399.5,2357.14,2374.59,19.233005,2373.116392,-0.004218
1,1514764860,0,5.0,8.53,8.53,8.53,8.53,78.38,8.53,-0.014399
2,1514764860,1,229.0,13835.194,14013.8,13666.11,13850.176,31.550062,13827.062093,-0.014643
3,1514764860,5,32.0,7.6596,7.6596,7.6567,7.6576,6626.71337,7.657713,-0.013922
4,1514764860,7,5.0,25.92,25.92,25.874,25.877,121.08731,25.891363,-0.008264


In [3]:
df_asset_details = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/asset_details.csv').sort_values("Asset_ID")
df_asset_details

Unnamed: 0,Asset_ID,Weight,Asset_Name
1,0,4.304065,Binance Coin
2,1,6.779922,Bitcoin
0,2,2.397895,Bitcoin Cash
10,3,4.406719,Cardano
13,4,3.555348,Dogecoin
3,5,1.386294,EOS.IO
5,6,5.894403,Ethereum
4,7,2.079442,Ethereum Classic
11,8,1.098612,IOTA
6,9,2.397895,Litecoin


In [4]:
#Init MinMaxScaler
scaler = MinMaxScaler()

# Two new features from the competition tutorial
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

# A utility function to build features from the original df
# It works for rows to, so we can reutilize it.
def get_features(df):
    df_feat = df[['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']].copy()
    df_feat['Upper_Shadow'] = upper_shadow(df_feat)
    df_feat['Lower_Shadow'] = lower_shadow(df_feat)
    return df_feat

def get_Xy_and_model_for_asset(df_train, asset_id):
    df = df_train[df_train["Asset_ID"] == asset_id]
    
    # TODO: Try different features here!
    df_proc = get_features(df)
    df_proc['y'] = df['Target']
    df_proc = df_proc.dropna(how="any")
    
    X = df_proc.drop("y", axis=1)
    X = scaler.fit_transform(X)
    y = df_proc["y"]

    # Initializing models
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    xgboost = xgb.XGBRegressor(tree_method='gpu_hist')
    regressors = [xgboost, svr_rbf, lr]
    model = StackingRegressor(regressors=regressors, 
                               meta_regressor=lr)

    model.fit(X, y)

    return X, y, model

In [5]:
Xs = {}
ys = {}
models = {}

for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):
    print(f"Training model for {asset_name:<16} (ID={asset_id:<2})")
    try:
        X, y, model = get_Xy_and_model_for_asset(df_train, asset_id)
        print(model.score(X,y))
        Xs[asset_id], ys[asset_id], models[asset_id] = X, y, model
    except: 
        traceback.print_exc()
        Xs[asset_id], ys[asset_id], models[asset_id] = None, None, None

Training model for Binance Coin     (ID=0 )
0.07241653155096284
Training model for Bitcoin          (ID=1 )
0.05660678432197752
Training model for Bitcoin Cash     (ID=2 )
0.20072030700134036
Training model for Cardano          (ID=3 )
0.041507277130102826
Training model for Dogecoin         (ID=4 )
0.14741934892797437
Training model for EOS.IO           (ID=5 )
0.05764942651981941
Training model for Ethereum         (ID=6 )
0.060252879973171525
Training model for Ethereum Classic (ID=7 )
0.06168988297504818
Training model for IOTA             (ID=8 )
0.051108322573599985
Training model for Litecoin         (ID=9 )
0.04906264708478003
Training model for Maker            (ID=10)


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
Traceback (most recent call last):
  File "/tmp/ipykernel_34/4128875888.py", line 8, in <module>
    X, y, model = get_Xy_and_model_for_asset(df_train, asset_id)
  File "/tmp/ipykernel_34/1362256623.py", line 28, in get_Xy_and_model_for_asset
    X = scaler.fit_transform(X)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/base.py", line 847, in fit_transform
    return self.fit(X, **fit_params).transform(X)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/preprocessing/_data.py", line 416, in fit
    return self.partial_fit(X, y)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/preprocessing/_data.py", line 458, in partial_fit
    force_all_finite="allow-nan",
  File "/opt/conda/lib/python3.7/site-packages/sklearn/base.py", line 561, in _validate_data
    X = check_array(X, **check_params)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py", line 792, in check_array
    _assert_all_fini

Training model for Monero           (ID=11)
0.06608120008350948
Training model for Stellar          (ID=12)
0.06302274356728721
Training model for TRON             (ID=13)
0.05748008916239333


In [6]:
#Submitting results to Kaggle
env = gresearch_crypto.make_env()
iter_test = env.iter_test()

for i, (df_test, df_pred) in enumerate(iter_test):
    for j , row in df_test.iterrows():
        
        if models[row['Asset_ID']] is not None:
            try:
                model = models[row['Asset_ID']]
                x_test = get_features(row)
                y_pred = model.predict(pd.DataFrame([x_test]))[0]
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = y_pred
            except:
                df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0
                traceback.print_exc()
        else: 
            df_pred.loc[df_pred['row_id'] == row['row_id'], 'Target'] = 0
        

        env.predict(df_pred)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, but {self.__class__.__name__} was fitted without"
  f"X has feature names, 

PermissionError: [Errno 1] Operation not permitted: 'submission.csv'

In [7]:
#Submission is not permetted
#Saving results to a different file name

df_pred.to_csv('./submissionn.csv')