## Dependencies

In [None]:
import gc
import warnings

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
import tensorflow as tf
import statsmodels.api as sm
from pylab import rcParams
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import time
import datetime

In [None]:
def collect_gc():
    i = 0
    while i <= 6:
        print('collecting garbage...')
        print(gc.collect())
        i = i + 1
        
warnings.filterwarnings('ignore')

## Loading the Data

In [None]:
folder = '../input/g-research-crypto-forecasting/'

# Train data
base_train = pd.read_csv(folder+'train.csv', low_memory=False)

# Assets
asset_mapping = pd.read_csv(folder+'asset_details.csv', index_col=0)
asset_mapping = asset_mapping.reindex([i for i in range(14)])
asset_mapping

## Preprocessing Data

In [None]:
# converting the timestamp to minutes
base_train['time'] = pd.to_datetime(base_train['timestamp'], unit='s')
base_train.head(50)

base_train.isnull().sum()

In [None]:
bin_coin, btc, btc_cash, card, doge, eos_io, eth, eth_class, iota, ltc, maker, mon, stela, tron = [
    base_train.loc[base_train.Asset_ID == i] for i in range(14)]

assets = [bin_coin, btc, btc_cash, card, doge, eos_io, eth, eth_class, iota, ltc, maker, mon, stela, tron]

In [None]:
print("--> Checking for physical Tensorflow devices")
for device in tf.config.list_physical_devices():
    print(": {}".format(device.name))

btc.set_index(btc.time, drop=True, inplace=True)

In [None]:
print(btc.info())
btc_plt = btc.iloc[-8000:]

with tf.device('/GPU:0'):
    fig = go.Figure(data=go.Ohlc(x=btc_plt.time,
                open=btc_plt.Open,
                high=btc_plt.High,
                low=btc_plt.Low,
                close=btc_plt.Close))
    fig.show()

In [None]:
# Handling missing Values
eth = base_train.loc[base_train['Asset_ID'] == 6]
eth.set_index(eth.time, drop=True, inplace=True)

btc.isnull().sum()

btc.fillna(method='ffill', inplace=True)
eth.fillna(method='ffill', inplace=True)
eth.isnull().sum()

## Analysis and Visualization

### Seasonality and Trend

In [None]:
def decompose(price):
    rcParams['figure.figsize'] = 15, 7
    decomp_series = sm.tsa.seasonal_decompose(price, model='addictive')
    decomp_series.plot()
    plt.show()

for count, asset in enumerate(assets):
    asset2 = asset.set_index(asset.time)
    print(f' \033[92m \033[1m{asset_mapping.Asset_Name[count]}\033[1m \033[92m ')
    decompose(asset2.Close.resample('M').mean())
    print('\n');print('\n')

### Stationarity Test

In [None]:
def stationarity_test(col):
    
    print('--------------Stationarity Test--------------')
    adf_result = adfuller(col)
    print('ADF Statistic:', adf_result[0])
    print('p-value:', adf_result[1])
    print('No. of lags used:', adf_result[2])
    print('No. of observations used :', adf_result[3])
    if adf_result[1] < 0.05:
        print('TSD is Stationary')
    else:
        print('TSD is not Stationary')
    print( 'Critical Values:' )
    for k, v in adf_result[4].items():
        print( f' {k} : {v} ' )
    print('\n')

to_datetime = lambda s: pd.to_datetime(s, unit='s')
for a in assets:
    a.index = to_datetime(a.index)
    
for count, asset in enumerate(assets):
    print(asset_mapping.Asset_Name[count])
    stationarity_test(asset.Close.resample('D').mean())

In [None]:
collect_gc()

### Price Change

In [None]:
def get_cmap(n, name='hsv'):
    return plt.cm.get_cmap(name, n)
cmap = get_cmap(len(assets))

def diff(tsd, periods=1):
    return np.log(tsd).diff(periods=periods)

fig, ax = plt.subplots(len(assets), 1, figsize=(17, 25))
for count, asset in enumerate(assets):
    ax[count].plot(asset.time[1:], diff(asset.Close)[1:], c=cmap(count))
    ax[count].set_title(asset_mapping.Asset_Name[count])

plt.suptitle('Price Change \n')
plt.tight_layout()
plt.show()

In [None]:
for count, asset in enumerate(assets):
    print(asset_mapping.Asset_Name[count])
    stationarity_test(diff(asset.Close).resample('D').mean())

### Autocorrelation and Partial Autocorrelation

In [None]:
fig, axes = plt.subplots(len(assets), 2, figsize=(15, 35))

for count, asset in enumerate(assets):
    fig = plot_acf(diff(asset.Close)[1:].resample('D').mean(), lags=20, ax=axes[count, 0])
    fig = plot_pacf(diff(asset.Close)[1:].resample('D').mean(), lags=20, ax=axes[count, 1])
    axes[count, 0].set_title(f'{asset_mapping.Asset_Name[count]} ACF')
    axes[count, 1].set_title(f'{asset_mapping.Asset_Name[count]} PACF')

plt.tight_layout()
plt.show()

### Checking for correlation between assets

#### 2018-2021

In [None]:
fig, axes = plt.subplots(len(assets), 1, figsize=(20, 40))

for count, asset in enumerate(assets):
    axes[count].plot(asset.time, asset.Close, color=cmap(count))
    axes[count].set_title(asset_mapping.Asset_Name[count])

plt.tight_layout()
plt.show()

#### September 2021

In [None]:
# 1 Month Plot
btc_sep = btc.loc['2021-09-01 00:01:00':'2021-09-30 00:01:00']
eth_sep = eth.loc['2021-09-01 00:01:00':'2021-09-30 00:01:00']

fig = plt.figure(figsize=(20, 8))
fig.add_subplot(121)
plt.title('Ethereum (September)')
plt.xlabel('Time')
plt.ylabel('Price')
plt.plot(eth_sep.time, eth_sep.Close)

fig.add_subplot(122)
plt.title('Bitcoin (September)')
plt.plot(btc_sep.time, btc_sep.Close, color='r')
plt.xlabel('Time')
plt.ylabel('Price')
plt.tight_layout()
plt.show()

In [None]:
collect_gc()

In [None]:
btc_change = diff(btc.Close)[1:].rename('Bitcoin')
eth_change = diff(eth.Close)[1:].rename('Etheruem')
asset_corr = pd.concat([btc_change, eth_change], axis=1)

to_timestamp = lambda a: [time.mktime(t.timetuple()) for t in a]
asset_corr.index = to_timestamp(asset_corr.index)
corr = asset_corr.groupby(asset_corr.index//(10000*60)).corr().loc[:,"Bitcoin"].loc[:,"Etheruem"]
rcParams['figure.figsize'] = 15, 8
corr.plot()
plt.title('Bitcoin and Ethereum Correlation since 2018')
plt.show()