In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data_folder = "../input/g-research-crypto-forecasting/"
crypto_df = pd.read_csv(data_folder + 'train.csv')
asset_details_df = pd.read_csv(data_folder + 'asset_details.csv')
supp_train_df = pd.read_csv(data_folder + 'supplemental_train.csv')
expl_test = pd.read_csv(data_folder + 'example_test.csv')

In [None]:
#replace the spaces inside the asset name with undescore '_'
asset_details_df["Asset_Name"] = asset_details_df["Asset_Name"].str.replace(' ','_')
asset_details_df["Asset_Name"] = asset_details_df["Asset_Name"].str.replace('.','_')

In [None]:
asset_details = asset_details_df.copy()

In [None]:
#Create a dictionary of data frames assigned to each coin 
dataframes = {}
for asset_id, asset_name in zip(asset_details_df.Asset_ID, asset_details_df.Asset_Name):    
    keys = range(4)
    vars()[asset_name] = crypto_df[crypto_df["Asset_ID"]==asset_id].set_index("timestamp")
    dataframes[asset_id] = vars()[asset_name]

In [None]:
asset_details_df = asset_details_df.set_index("Asset_ID")

In [None]:
list(dataframes)

In [None]:
#Some of the records are not received and we can check it by calculating the difference between the timestamps
#In order to resolve this we need to reindex each of our dataset
cleaned_dataframes = {}
for i in list(dataframes):
    cleaned_dataframes[i] = dataframes[i].reindex(range(dataframes[i].index[0],dataframes[i].index[-1]+60,60),method='pad')

In [None]:
#Reduce the datastes to ease the correlation calculation
from datetime import datetime
import time
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%d/%m/%Y").timetuple()))
reduced_dataframes = {}
for i in list(cleaned_dataframes):
    reduced_dataframes[i] = cleaned_dataframes[i].loc[totimestamp('01/01/2021'):totimestamp('01/05/2021')]
    #reduced_dataframes[i] = cleaned_dataframes[i].iloc[-100:]

In [None]:
list(reduced_dataframes)

In [None]:
import plotly.graph_objects as go
for i in list(reduced_dataframes) :
    print(i)
    print(asset_details_df.Asset_Name[i])
    fig = go.Figure(data=[go.Candlestick(x=reduced_dataframes[i].index, open=reduced_dataframes[i]['Open'], high=reduced_dataframes[i]['High'], low=reduced_dataframes[i]['Low'], close=reduced_dataframes[i]['Close'])])
    fig.show()

In [None]:
# define function to compute log returns
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)

In [None]:
corr_cryptos = pd.DataFrame([])
for i in list(reduced_dataframes) :
    lret = log_return(reduced_dataframes[i].Close.fillna(0))[1:]
    corr_cryptos = corr_cryptos.join(lret, rsuffix=asset_details_df["Asset_Name"][i], how="outer")

In [None]:
corr_cryptos

In [None]:
import matplotlib.pyplot as plt
plt.imshow(corr_cryptos.corr());
plt.yticks(asset_details.Asset_ID.values, asset_details.Asset_Name.values);
plt.xticks(asset_details.Asset_ID.values, asset_details.Asset_Name.values, rotation='vertical');
plt.colorbar();