In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import time

In [None]:
data_folder = "../input/g-research-crypto-forecasting/"

In [None]:
crypto_df = pd.read_csv(data_folder + 'train.csv')
crypto_df.head(10)

In [None]:
crypto_df.describe()

In [None]:
crypto_df.info(show_counts =True)

In [None]:
crypto_df.isna().sum()

In [None]:
crypto_df.replace([np.inf, -np.inf], np.nan, inplace=True)
crypto_df.dropna(inplace=True)
crypto_df.drop_duplicates(inplace=True)

In [None]:
display(crypto_df.info(show_counts =True))
display(crypto_df.isna().sum())
crypto_df.describe()

In [None]:
display(crypto_df[crypto_df['VWAP'].isna()])

* **timestamp:** All timestamps are returned as second Unix timestamps (the number of seconds elapsed since 1970-01-01 00:00:00.000 UTC). Timestamps in this dataset are multiple of 60, indicating minute-by-minute data.
* **Asset_ID:** The asset ID corresponding to one of the crytocurrencies (e.g. Asset_ID = 1 for Bitcoin). The mapping from Asset_ID to crypto asset is contained in asset_details.csv.
* **Count:** Total number of trades in the time interval (last minute).
* **Open:** Opening price of the time interval (in USD).
* **High:** Highest price reached during time interval (in USD).
* **Low:** Lowest price reached during time interval (in USD).
* **Close:** Closing price of the time interval (in USD).
* **Volume:** Quantity of asset bought or sold, displayed in base currency USD.
* **VWAP:** The average price of the asset over the time interval, weighted by volume. VWAP is an aggregated form of trade data.
* **Target:** Residual log-returns for the asset over a 15 minute horizon.

In [None]:
asset_details_df = pd.read_csv(data_folder + 'asset_details.csv')
display(asset_details_df)

In [None]:
for i in range (0,14):
    tmp_df = crypto_df[crypto_df["Asset_ID"]==i].set_index("timestamp") # Asset_ID = 1 for Bitcoin
    tmp_df_mini = tmp_df.iloc[-200:] # Select recent data rows
    fig = go.Figure(data=[go.Candlestick(x=tmp_df_mini.index, open=tmp_df_mini['Open'], high=tmp_df_mini['High'], low=tmp_df_mini['Low'], close=tmp_df_mini['Close'])])
    fig.update_layout(
        title=asset_details_df[asset_details_df['Asset_ID']==i]['Asset_Name'].values[0]
    )
    fig.show()

In [None]:
crypto_df[crypto_df["Asset_ID"]==0].head(10)

In [None]:
crypto_df['date_time'] = crypto_df['timestamp'].map(datetime.fromtimestamp)

In [None]:
crypto_df[crypto_df["Asset_ID"]==0].head(10)

In [None]:
for i in range (0,14):
    tmp_df = crypto_df[crypto_df["Asset_ID"]==i].set_index("timestamp") # Asset_ID = 1 for Bitcoin
    fig=plt.figure(figsize=(12,8))
    plt.plot(tmp_df['date_time'], tmp_df['VWAP'])
    plt.title(asset_details_df[asset_details_df['Asset_ID']==i]['Asset_Name'].values[0])
    plt.show()
    plt.figure(figsize=(12,8))
    plt.plot(tmp_df['date_time'], tmp_df['Target'])
    plt.show()

In [None]:
# auxiliary function, from datetime to timestamp
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%d/%m/%Y").timetuple()))


# define function to compute log returns
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)


# create dataframe with returns for all assets
all_assets_2021 = pd.DataFrame([])
for asset_id, asset_name in zip(asset_details_df.Asset_ID, asset_details_df.Asset_Name):
  asset = crypto_df[crypto_df["Asset_ID"]==asset_id].set_index("timestamp")
  asset = asset.loc[totimestamp('01/01/2021'):totimestamp('01/05/2021')]
  asset = asset.reindex(range(asset.index[0],asset.index[-1]+60,60),method='pad')
  lret = log_return(asset.Close.fillna(0))[1:]
  all_assets_2021 = all_assets_2021.join(lret, rsuffix=asset_name, how="outer")

fig=plt.figure(figsize=(12,8))

plt.title(asset_details_df[asset_details_df['Asset_ID']==i]['Asset_Name'].values[0])
plt.imshow(all_assets_2021.corr());
plt.yticks(asset_details_df.Asset_ID.values, asset_details_df.Asset_Name.values);
plt.xticks(asset_details_df.Asset_ID.values, asset_details_df.Asset_Name.values, rotation='vertical');
plt.colorbar();
plt.show()
