In [6]:
import pandas as pd
import polars as pl
import numpy as np
from riskmatrix.alpha.alpha101 import compute_alpha101

In [7]:
FILTER_SYMBOLS = ['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 'XRPUSDT', 'DOGEUSDT', 'BNBUSDT', 'ADAUSDT', '1000SHIBUSDT', 'MATICUSDT', 'AVAXUSDT', 'ETCUSDT', 'LTCUSDT', 'LINKUSDT', 'DOTUSDT', 'FTMUSDT', '1000PEPEUSDT', 'GMTUSDT', 'BCHUSDT', 'FILUSDT', 'SANDUSDT', 'NEARUSDT', 'EOSUSDT', 'AXSUSDT', 'GALAUSDT', 'APEUSDT', 'ATOMUSDT', 'MANAUSDT', 'APTUSDT', 'OPUSDT', 'TRXUSDT', 'DYDXUSDT', 'ORDIUSDT', 'CHZUSDT', 'TRBUSDT', 'WIFUSDT', 'CRVUSDT', 'ARBUSDT', 'UNIUSDT', 'SUIUSDT', 'WLDUSDT', 'AAVEUSDT', 'WAVESUSDT', 'PEOPLEUSDT', 'SUSHIUSDT', 'MASKUSDT', 'RUNEUSDT', 'XLMUSDT', 'THETAUSDT', 'INJUSDT', 'LRCUSDT', 'CFXUSDT', 'VETUSDT', 'ALGOUSDT', 'ALICEUSDT', 'ZILUSDT', 'SXPUSDT', 'XTZUSDT', 'GRTUSDT', 'LINAUSDT', 'MKRUSDT', 'TIAUSDT', 'STORJUSDT', 'UNFIUSDT', '1000BONKUSDT', 'NOTUSDT', 'ENJUSDT', 'NEOUSDT', 'EGLDUSDT', 'FETUSDT', '1INCHUSDT', '1000LUNCUSDT', 'STXUSDT', 'COMPUSDT', 'YFIUSDT', 'ONEUSDT', 'KNCUSDT', 'LDOUSDT', '1000SATSUSDT', 'SNXUSDT', 'ZECUSDT', 'ENSUSDT', 'OMGUSDT', 'BOMEUSDT', 'SEIUSDT', 'MTLUSDT', 'BLZUSDT', 'REEFUSDT', 'CHRUSDT', 'BAKEUSDT', 'KAVAUSDT', '1000FLOKIUSDT', 'ENAUSDT', 'RSRUSDT', 'XMRUSDT', 'RNDRUSDT', 'RLCUSDT', 'OGNUSDT', 'JASMYUSDT', 'BELUSDT', 'OCEANUSDT']

In [8]:
def compute_past_close_return(df: pl.DataFrame, N: int):
    for i in range(N):
        df = df.with_columns(
            ((pl.col("close") / pl.col("close").shift(i+1) - 1) * 100)
            .over("symbol")  # Applying the function over each symbol group
            .alias(f'past_close_return_{i+1}')
        )
    df = df.with_columns(returns=pl.col('past_close_return_1'))
    return df

In [9]:
def read_binance_data(filename, filter_symbols):
    # read parquet file with pandas
    df = pl.read_parquet(filename)
    df = df.with_columns(vwap=pl.col('quote_volume')/pl.col('volume'))
    df = df.with_columns(open_time=pl.from_epoch(pl.col("open_time"), time_unit="ms").cast(pl.Datetime('ms')))
    df = df.with_columns(close_time=pl.from_epoch(pl.col("close_time"), time_unit="ms").cast(pl.Datetime('ms')))

    # filter symbols ending in USDT
    df = df.filter(pl.col("symbol").is_in(filter_symbols))
    # FILTER_SYMBOLS = df[['symbol', 'quote_volume']].groupby('symbol').sum().sort_values('quote_volume', ascending=False).index.to_list()[:100]
    df = df.sort(by=['symbol', 'open_time'])
    return df


In [10]:
df = read_binance_data('../scratch/dp/data/all_data_1d.parquet', FILTER_SYMBOLS)
df = compute_past_close_return(df, 10)
df = compute_alpha101(df, open='open', high='high', low='low', close='close', volume='volume', returns='returns', vwap='vwap')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill',

In [None]:
alphaname = 'alpha030'
returns = df[["open_time", "symbol", "returns"]].pivot(index="open_time", columns="symbol", values="returns").sort("open_time")
alpha = df[["open_time", "symbol", alphaname]].pivot(index="open_time", columns="symbol", values=alphaname).sort("open_time")

assert (returns["open_time"] == alpha["open_time"]).all()
columns = returns.columns[1:]
returns = returns[columns].to_numpy()
alpha = alpha[columns].to_numpy()

alpha = alpha[:-1]
returns = returns[1:]

long_quantile = 0.5
short_quantile = 0.5

quantiles = np.nanquantile(alpha, [short_quantile, long_quantile], axis=1)
holdings = alpha > long_quantile

long = np.nan_to_num(np.nan_to_num(returns * (factors_np > long_quantile), 0.0).sum(axis=1) / (factors_np > long_quantile).sum(axis=1) , 0.0)

array([[        nan,         nan,         nan, ...,         nan,
                nan,         nan],
       [        nan,         nan,         nan, ..., -1.67728965,
         1.67874183, -8.33439046],
       [        nan,         nan,         nan, ..., 11.58768367,
         3.45846368, -5.8953778 ],
       ...,
       [-4.87369985, -2.58558428, -2.37080739, ..., -0.36001694,
         2.85326087,  0.21613833],
       [-1.10382172, -2.85438909, -2.85362578, ..., -1.424017  ,
        -2.61558785, -1.9410496 ],
       [-5.28061493, -7.05182019, -4.23914296, ..., -1.3367831 ,
         0.86814976, -3.95894428]])

In [42]:
alpha[:-1].shape

(1403, 100)