In [5]:
from numerapi import NumerAPI

napi = NumerAPI()

napi.download_dataset("crypto/v1.0/live_universe.parquet", "data/live_universe.parquet")
napi.download_dataset("crypto/v1.0/train_targets.parquet", "data/train_targets.parquet")

2024-06-22 15:51:16,216 INFO numerapi.utils: target file already exists
2024-06-22 15:51:16,216 INFO numerapi.utils: download complete
2024-06-22 15:51:17,141 INFO numerapi.utils: target file already exists
2024-06-22 15:51:17,141 INFO numerapi.utils: download complete


'data/train_targets.parquet'

In [6]:
import pandas as pd
import lightgbm as lgb

In [7]:
train = pd.read_parquet("data/train_targets.parquet")
live = pd.read_parquet("data/live_universe.parquet")

In [8]:
train["symbol"].unique(), live["symbol"].unique()

(array(['0xBTC', 'ABBC', 'ABT', ..., 'BITCOIN', 'SHDW', 'SNEK'],
       dtype=object),
 array(['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'DOGE', 'TON', 'ADA', 'AVAX',
        'SHIB', 'TRX', 'LINK', 'DOT', 'BCH', 'NEAR', 'UNI', 'MATIC', 'LTC',
        'LEO', 'PEPE', 'ICP', 'KAS', 'ETC', 'APT', 'XMR', 'RNDR', 'HBAR',
        'XLM', 'ATOM', 'ARB', 'OKB', 'FIL', 'CRO', 'STX', 'IMX', 'MKR',
        'SUI', 'VET', 'GRT', 'OP', 'TAO', 'INJ', 'AR', 'FLOKI', 'FTM',
        'BGB', 'THETA', 'FLZ', 'JASMY', 'RUNE', 'BONK', 'FET', 'CORE',
        'AAVE', 'ALGO', 'CHEEL', 'FLR', 'FLOW', 'KCS', 'PENDLE', 'QNT',
        'GALA', 'AXS', 'ORDI', 'BSV', 'BEAM', 'EOS', 'ENS', 'DYDX', 'BTT',
        'GNO', 'NEO', 'EGLD', 'XTZ', 'RON', 'SAND', 'WLD', 'GT', 'AKT',
        'NEXO', 'CHZ', 'FTN', 'SNX', 'DEXE', 'SAFE', 'MINA', 'XEC', 'ROSE',
        'CFX', 'CAKE', 'LPT', 'KLAY', 'OM', 'XAUt', 'IOTA', 'APE', 'HNT',
        'AIOZ', '1INCH', 'LUNC', 'KAVA', 'CKB', 'XDC', 'FTT', 'TFUEL',
        'NFT', 'BTG', 'BLUR', 'TWT',

In [9]:
all_symbols = set(train["symbol"].unique()).union(set(live["symbol"].unique()))
pd.Series(list(all_symbols)).to_csv("data/all_symbols.csv", index=False)

In [10]:
predictions = (
    train.groupby("symbol")
    .apply(
        lambda x: x.sort_values("date")["target"]
        .fillna(method="ffill")
        .rolling(10, min_periods=1)
        .mean()
        .iloc[-1]
    )
    .rank(pct=True)
    .to_frame("signal")
)

In [11]:
submission = live.merge(predictions, on="symbol", how="left").dropna()
submission.to_csv("predictions/predictions.csv", index=False)