In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor

In [None]:
crypto_df = pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
crypto_df.head()

In [None]:
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_details

In [None]:
btc = crypto_df[crypto_df['Asset_ID'] == 1].set_index('timestamp')
eth = crypto_df[crypto_df['Asset_ID'] == 6].set_index('timestamp')
eth.info(show_counts=True)
btc.info(show_counts=True)

In [None]:
eth.isna().sum()
btc.isna().sum()

In [None]:
beg_btc = btc.index[0].astype('datetime64[s]')
end_btc = btc.index[-1].astype('datetime64[s]')

beg_eth = eth.index[0].astype('datetime64[s]')
end_eth = eth.index[-1].astype('datetime64[s]')

print('BTC Data are from ', beg_btc,'to ',end_btc)
print('ETH data are from ', beg_eth,' to ',end_eth)

In [None]:
(eth.index[1:] - eth.index[:-1]).value_counts().head()

In [None]:
eth = eth.reindex(range(eth.index[0],eth.index[-1]+60,60),method='pad')
btc = btc.reindex(range(btc.index[0],btc.index[-1]+60,60),method='pad')

In [None]:
f = plt.figure(figsize=(15,4))

ax = f.add_subplot(121)
plt.plot(btc['Close'],color='green',label='BTC')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Bitcoin')

ax2 = f.add_subplot(122)
ax2.plot(eth['Close'],color='red',label='ETH')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Etherium')

plt.tight_layout()
plt.show()

In [None]:
def log_return(series,periods=1):
    return np.log(series).diff(periods=periods)

In [None]:
import scipy.stats as stats
lret_btc = log_return(btc.Close)[1:]
lret_eth = log_return(eth.Close)[1:]
lret_btc.rename('BTC_lret',inplace=True)
lret_eth.rename('ETH_lret',inplace=True)
two_assets = pd.concat([lret_btc,lret_eth],axis=1)
two_assets

In [None]:
corr_time = two_assets.groupby(two_assets.index//(10000*60)).corr().loc[:,'BTC_lret'].loc[:,'ETH_lret']
corr_time.plot()
plt.xticks()

In [None]:
upper_shadow = lambda asset: asset.High - np.maximum(asset.Close,asset.Open)
lower_shadow = lambda asset: np.minimum(asset.Close,asset.Open) - asset.Low

X_btc = pd.concat([log_return(btc.VWAP,periods=5), log_return(btc.VWAP,periods=1).abs(),upper_shadow(btc),lower_shadow(btc)],axis=1)
y_btc = btc.Target

X_eth = pd.concat([log_return(eth.VWAP,periods=5), log_return(eth.VWAP,periods=1).abs(),upper_shadow(eth),lower_shadow(eth)],axis=1)
y_eth = eth.Target

In [None]:
import time
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s,"%d/%m/%Y").timetuple()))

train_window = [totimestamp('01/05/2021'),totimestamp('30/05/2021')]
test_window = [totimestamp('01/06/2021'),totimestamp('30/06/2021')]
test_window

In [None]:
X_btc_train = X_btc.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()
y_btc_train = y_btc.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()

X_btc_test = X_btc.loc[test_window[0]:test_window[1]].fillna(0).to_numpy()
y_btc_test = y_btc.loc[test_window[0]:test_window[1]].fillna(0).to_numpy()

X_eth_train = X_eth.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()
y_eth_train = y_eth.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()

X_eth_test = X_eth.loc[test_window[0]:test_window[1]].fillna(0).to_numpy()
y_eth_test = y_eth.loc[test_window[0]:test_window[1]].fillna(0).to_numpy()

In [None]:
scaler = StandardScaler()

X_btc_train_scaled = scaler.fit_transform(X_btc_train)
X_btc_test_scaled = scaler.transform(X_btc_test)

X_eth_train_scaled = scaler.fit_transform(X_eth_train)
X_eth_test_scaled = scaler.transform(X_eth_test)

In [None]:
lr = LinearRegression()
lr.fit(X_btc_train_scaled,y_btc_train)
y_pred_lr_btc = lr.predict(X_btc_test_scaled)

lr.fit(X_eth_train_scaled,y_eth_train)
y_pred_lr_eth = lr.predict(X_eth_test_scaled)


In [None]:
print('Test Score LR Baseline: BTC', f'{np.corrcoef(y_pred_lr_btc,y_btc_test)[0,1]:.2f}')