# Crypto_DataComplement_Plot
- Plot the Target and Close for each cryptocurrency.
- Complement each cryptocurrency data so that they have the same time period.

In [None]:
import os
import random
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
import gresearch_crypto
from tqdm.notebook import trange, tqdm
import matplotlib.pyplot as plt
import gresearch_crypto

In [None]:
TRAIN_CSV = '/kaggle/input/g-research-crypto-forecasting/train.csv'
ASSET_DETAILS_CSV = '/kaggle/input/g-research-crypto-forecasting/asset_details.csv'
SEED = 2021
REMOVE_LB_TEST_OVERLAPPING_DATA = True

def fix_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

fix_seeds(SEED)

## Load data

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
asset_details_df = pd.read_csv(ASSET_DETAILS_CSV).sort_values('Asset_ID')

# Remove the future
if REMOVE_LB_TEST_OVERLAPPING_DATA:
    train_df['datetime'] = pd.to_datetime(train_df['timestamp'], unit='s')
    train_df = train_df[train_df['datetime'] < '2021-06-13 00:00:00']

display(train_df.head(3))
display(asset_details_df.head(3))

## Data complement

In [None]:
def data_complement(train_df, asset_details_df):
    asset_df_list = []
    for asset_id in tqdm(asset_details_df.Asset_ID):
        
        asset_name = asset_details_df.loc[asset_details_df.Asset_ID==asset_id, 'Asset_Name'].values[0]
        print(f"{asset_id: >2} {asset_name: <20}", end=' ')
        asset_df = train_df.loc[train_df.Asset_ID == asset_id].set_index('timestamp')
        print(f"Length : {len(asset_df):,}", end=' -> ')
        
        # Re Index
        asset_df = asset_df.reindex(range(train_df.timestamp.min(),train_df.timestamp.max()+60,60), method='pad')
        asset_df['datetime'] = pd.to_datetime(asset_df.index, unit='s')
        
        # Data complement
        asset_df['Asset_ID'] = asset_id
        fill_cols = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']
        asset_df[fill_cols] = asset_df[fill_cols].replace([np.inf, -np.inf], np.nan)  # Replace Infinite
        asset_df[fill_cols] = asset_df[fill_cols].interpolate('index')
        asset_df['Target'] =  asset_df['Target'].fillna(value=0.0)
        asset_df[fill_cols] =  asset_df[fill_cols].fillna(asset_df[fill_cols].mean())
        
        # Drop if nan
        asset_df = asset_df.dropna(subset=['Target'])
        
        print(f"{len(asset_df):,}")
        asset_df_list.append(asset_df)  
        
    return pd.concat(asset_df_list, axis=0)

In [None]:
train_complemented_df = data_complement(train_df, asset_details_df)

## Data view

In [None]:
def plt_assets(df, asset_details_df):
    plt.subplots_adjust(wspace=10.0, hspace=2.0)
    fig = plt.figure(figsize=(18, 25))
    for i, asset_id in enumerate(asset_details_df.Asset_ID):
        asset_name = asset_details_df.loc[asset_details_df.Asset_ID==asset_id, 'Asset_Name'].values[0]
        ax = fig.add_subplot(7, 2, i+1)
        plt.xlabel('DateTime')
        plt.ylabel(asset_name)
        ax.plot(df.loc[df.Asset_ID==asset_id, 'Close'], color='red', label='Close')
        plt.legend(loc='upper left')
        ax2 = ax.twinx()
        plt.grid()
        ax2.plot(df.loc[df.Asset_ID==asset_id, 'Target'], color='blue', alpha=0.5, label='Target')
        plt.legend(loc='upper right')
    
    plt.show()

In [None]:
plt_assets(train_complemented_df, asset_details_df)

In [None]:
display(train_complemented_df.head(3))