# <span style='color:#A80808'>Goal</span>
Forecast short term returns in 14 popular cryptocurrencies.

# <span style='color:#A80808'>Metric</span>
Submissions are evaluated on a weighted version of the Pearson correlation coefficient.

![](https://point-banque.fr/wp-content/uploads/2021/09/crypto-802x485.jpg)

# <span style='color:#A80808'>Data</span>

* timestamp - A timestamp for the minute covered by the row.
* Asset_ID - An ID code for the cryptoasset.
* Count - The number of trades that took place this minute.
* Open - The USD price at the beginning of the minute.
* High - The highest USD price during the minute.
* Low - The lowest USD price during the minute.
* Close - The USD price at the end of the minute.
* Volume - The number of cryptoasset units traded during the minute.
* VWAP - The volume weighted average price for the minute.
* Target - 15 minute residualized returns.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'gray'
import plotly.express as px
import seaborn as sns

import warnings
warnings.simplefilter('ignore')

# <span style='color:#A80808'>Load Data</span>

In [None]:
train = pd.read_csv('../input/g-research-crypto-forecasting/train.csv')

In [None]:
train['time'] = pd.to_datetime(train.timestamp, unit='s')
train = train.drop('timestamp', axis=1)
train.head()

# <span style='color:#A80808'>Assets</span>

In [None]:
train.Asset_ID.unique()

In [None]:
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_details

In [None]:
plt.figure(figsize=(10,7))
plt.bar(asset_details.Asset_Name, asset_details.Weight, color='y')
plt.title(f'Asset weights', fontsize=16)
plt.ylabel('Weight', fontsize=16)
plt.xticks(rotation=90, fontsize=16)
plt.show()

# <span style='color:#A80808'>Target: 15mn residualized returns</span>

In [None]:
# Histogram
plt.figure(figsize=(10,5))
train.Target.hist(bins=100, color='yellow')
plt.xlabel('Return', fontsize=16)
plt.ylabel('Log count', fontsize=16)
plt.yscale('log')
plt.show()

# <span style='color:#A80808'>Morning vs Afternoon</span>

In [None]:
plt.figure(figsize=(15,5))

plt.subplot(1,2,1)
train[train.time.dt.hour<12].Target.hist(bins=100, color='orange', label='Morning')
train[train.time.dt.hour>=12].Target.hist(bins=100, color='yellow', label='Afternoon')
plt.xlabel('Return', fontsize=16)
plt.ylabel('Log count', fontsize=16)
plt.yscale('log')
plt.legend()

plt.subplot(1,2,2)
train[train.time.dt.hour>=12].Target.hist(bins=100, color='yellow', label='Afternoon')
train[train.time.dt.hour<12].Target.hist(bins=100, color='orange', label='Morning')
plt.xlabel('Return', fontsize=16)
plt.ylabel('Log count', fontsize=16)
plt.yscale('log')
plt.legend()

plt.show()

In [None]:
for Asset_ID in train.Asset_ID.unique():
    plt.figure(figsize=(10,5))
    # extract data for each asset
    df = train[train.Asset_ID==Asset_ID]
    df.Target.hist(bins=100, color='yellow')
    plt.title(f'{asset_details.Asset_Name[asset_details.Asset_ID==Asset_ID].iloc[0]}', fontsize=16)
    plt.xlabel('Return', fontsize=16)
    plt.ylabel('Log count', fontsize=16)
    plt.yscale('log')
    plt.tight_layout()

    plt.show()

    print('\n\n')
    