In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
# show files
!ls -l '../input/g-research-crypto-forecasting/'

In [None]:
# load asset details table
df_assets = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
df_assets

In [None]:
# plot asset weights
plt.bar(height=df_assets.Weight, x=df_assets.Asset_Name)
plt.title('Asset Weights')
plt.xticks(rotation=90)
plt.grid()
plt.show()

In [None]:
# load training data (takes some time...)
t1 = time.time()
df_train = pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
t2 = time.time()
print('Elapsed time [s]: ', np.round(t2-t1,2))

In [None]:
# preview
df_train.head()

In [None]:
# data frame structure
df_train.info(verbose=True, show_counts=True)

In [None]:
# plot target
df_train.Target.plot(kind='hist', bins=100)
plt.title('Target (histogram)')
plt.grid()
plt.show()

In [None]:
# plot target
df_train.Target.plot(kind='box', vert=False)
plt.title('Target (boxplot)')
plt.grid()
plt.show()

In [None]:
# basic stats
df_train.Target.describe(percentiles=[0.01,0.1,0.25,0.5,0.75,0.9,0.99])

# Plot each asset

### Volume Weighted Asset Price

In [None]:
for my_id in range(0,14):
    df_select = df_train[df_train.Asset_ID==my_id]
    plt.figure(figsize=(14,4))
    plt.scatter(df_select.timestamp, df_select.VWAP, s=1)
    my_title='Asset ID: ' + str(my_id) + ' - ' + df_assets[df_assets.Asset_ID==my_id].reset_index().Asset_Name[0]
    plt.title(my_title)
    plt.grid()
    plt.show()

### Target

In [None]:
for my_id in range(0,14):
    df_select = df_train[df_train.Asset_ID==my_id]
    plt.figure(figsize=(14,4))
    plt.scatter(df_select.timestamp, df_select.Target, s=1)
    my_title='Asset ID: ' + str(my_id) + ' - ' + df_assets[df_assets.Asset_ID==my_id].reset_index().Asset_Name[0]
    plt.title(my_title)
    plt.grid()
    plt.show()

# Target distribution by asset

In [None]:
plt.figure(figsize=(14,6))
sns.violinplot(data=df_train, x='Asset_ID', y='Target')
plt.show()

#### Zoom in:

In [None]:
plt.figure(figsize=(14,6))
sns.violinplot(data=df_train, x='Asset_ID', y='Target')
plt.ylim(-0.02,0.02)
plt.show()