In [7]:
import os
import pathlib

import pandas as pd
import numpy as np

In [4]:
data_dir = pathlib.Path("../data")

In [9]:
train = pd.read_csv(
    data_dir / "raw/train.csv",
    dtype={
        "Asset_ID": np.int8,
        "Count": np.int32,
        "Open": np.float64,
        "High": np.float64,
        "Low": np.float64,
        "Close": np.float64,
        "Volume": np.float64,
        "VWAP": np.float64,
        "Target": np.float64,
    },
)
asset_details = pd.read_csv(data_dir / "raw/asset_details.csv")

display(train.head())
display(asset_details)

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target
0,1514764860,2,40,2376.58,2399.5,2357.14,2374.59,19.233005,2373.116392,-0.004218
1,1514764860,0,5,8.53,8.53,8.53,8.53,78.38,8.53,-0.014399
2,1514764860,1,229,13835.194,14013.8,13666.11,13850.176,31.550062,13827.062093,-0.014643
3,1514764860,5,32,7.6596,7.6596,7.6567,7.6576,6626.71337,7.657713,-0.013922
4,1514764860,7,5,25.92,25.92,25.874,25.877,121.08731,25.891363,-0.008264


Unnamed: 0,Asset_ID,Weight,Asset_Name
0,2,2.397895,Bitcoin Cash
1,0,4.304065,Binance Coin
2,1,6.779922,Bitcoin
3,5,1.386294,EOS.IO
4,7,2.079442,Ethereum Classic
5,6,5.894403,Ethereum
6,9,2.397895,Litecoin
7,11,1.609438,Monero
8,13,1.791759,TRON
9,12,2.079442,Stellar


In [23]:
assetId_assetName = {row["Asset_ID"]: row["Asset_Name"] for _, row in asset_details[["Asset_ID", "Asset_Name"]].iterrows()}

In [25]:
train["Asset_Name"] = train["Asset_ID"].map(assetId_assetName)

In [26]:
out = train.pivot_table(
    index="timestamp", 
    columns="Asset_Name", 
    values=["Count", "Open", "High", "Low", "Close", "Volume", "VWAP"]
)

In [27]:
out.head()

Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Asset_Name,Binance Coin,Bitcoin,Bitcoin Cash,Cardano,Dogecoin,EOS.IO,Ethereum,Ethereum Classic,IOTA,Litecoin,...,Dogecoin,EOS.IO,Ethereum,Ethereum Classic,IOTA,Litecoin,Maker,Monero,Stellar,TRON
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1514764860,8.53,13850.176,2374.59,,,7.6576,738.5075,25.877,,225.206667,...,,6626.71337,335.987856,121.08731,,411.896642,,6.63571,,
1514764920,8.5145,13828.102,2372.286667,,,7.6567,738.26,25.897,,224.856667,...,,3277.475494,232.793141,1.468019,,3640.502706,,0.34942,,
1514764980,8.4848,13801.314,2372.063333,,,7.6512,737.5025,26.4695,,226.0,...,,5623.557585,174.138031,76.163922,,328.350286,,1.189553,,
1514765040,8.5009,13768.04,2370.566667,,,7.6358,737.1025,26.4495,,224.275,...,,1696.632459,165.383926,408.774848,,507.405579,,1.512079,,
1514765100,8.456,13724.914,2370.173333,,,7.61,735.705,26.437,,223.82,...,,2268.362218,193.078039,529.3376,,1035.67183,,11.997972,,


In [36]:
new_col_names = [f"{asset_name}_{col_name}" for col_name, asset_name in out.columns.tolist()]

In [38]:
out.columns = new_col_names

In [None]:
out = out.reindex(sorted(out.columns), axis=1)

In [41]:
out.head()

Unnamed: 0_level_0,Binance Coin_Close,Binance Coin_Count,Binance Coin_High,Binance Coin_Low,Binance Coin_Open,Binance Coin_VWAP,Binance Coin_Volume,Bitcoin Cash_Close,Bitcoin Cash_Count,Bitcoin Cash_High,...,Stellar_Open,Stellar_VWAP,Stellar_Volume,TRON_Close,TRON_Count,TRON_High,TRON_Low,TRON_Open,TRON_VWAP,TRON_Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1514764860,8.53,5.0,8.53,8.53,8.53,8.53,78.38,2374.59,40.0,2399.5,...,,,,,,,,,,
1514764920,8.5145,7.0,8.53,8.5145,8.53,8.520215,71.39,2372.286667,53.0,2400.9,...,,,,,,,,,,
1514764980,8.4848,45.0,8.5299,8.4848,8.5065,8.501394,1546.82,2372.063333,61.0,2401.9,...,,,,,,,,,,
1514765040,8.5009,14.0,8.5066,8.4744,8.5009,8.47981,125.8,2370.566667,95.0,2406.4,...,,,,,,,,,,
1514765100,8.456,5.0,8.5007,8.456,8.5007,8.458435,125.01,2370.173333,33.0,2404.6,...,,,,,,,,,,


In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24236806 entries, 0 to 24236805
Data columns (total 11 columns):
 #   Column      Dtype  
---  ------      -----  
 0   timestamp   int64  
 1   Asset_ID    int8   
 2   Count       int32  
 3   Open        float64
 4   High        float64
 5   Low         float64
 6   Close       float64
 7   Volume      float64
 8   VWAP        float64
 9   Target      float64
 10  Asset_Name  object 
dtypes: float64(7), int32(1), int64(1), int8(1), object(1)
memory usage: 1.7+ GB


In [8]:
for _, row in asset_details.sort_values(by="Asset_ID").iterrows():
    data = train[train["Asset_ID"] == row["Asset_ID"]]
    data = data.set_index("timestamp")
    data.dropna(subset=["Target"], axis=0, inplace=True)

    # Reindex to fill nan missing data(timestamp).
    # print(f"\n=== Asset Name is {row['Asset_Name']} ===")
    # data = data.reindex(range(data.index[0], data.index[-1] + 60, 60), method="pad")
    # split_data(data, asset_name=row["Asset_Name"])


=== Asset Name is Binance Coin ===


NameError: name 'split_data' is not defined