In [None]:
%load_ext autoreload
%autoreload 2

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import sys
from tqdm import tqdm

import matplotlib.pyplot as plt

import ccxt

In [None]:
hyperliquid = ccxt.hyperliquid()
markets = hyperliquid.load_markets()

In [None]:
filters = {
    'swap': True,
    'active': True,
}

volume_limit = 100

perp_markets = []
usd_volume_1d = {}
open_interest = {}
mid_px = {}
max_leverage = {}
for symbol, details in markets.items():
    fits_filters = True
    for k, v in filters.items():
        if details.get(k) != v:
            fits_filters = False
            break
    if fits_filters:
        perp_markets.append({symbol: details})
        usd_volume_1d[symbol.replace('/USDC:USDC', '')] = details['info']['dayNtlVlm']
        open_interest[symbol.replace('/USDC:USDC', '')] = details['info']['openInterest']
        mid_px[symbol.replace('/USDC:USDC', '')] = details['info']['midPx']
        max_leverage[symbol.replace('/USDC:USDC', '')] = details['info']['maxLeverage']

liquidity_df = pd.DataFrame({
    'usd_volume_1d': usd_volume_1d,
    'open_interest': open_interest,
    'mid_px': mid_px,  
    'max_leverage': max_leverage,
}).astype(float).sort_values(by='usd_volume_1d', ascending=False)

liquidity_df['usd_volume_1d'] = liquidity_df['usd_volume_1d'] / 1_000_000
liquidity_df['open_interest_usd'] = liquidity_df['open_interest'] * liquidity_df['mid_px'] / 1_000_000
#liquidity_df.sort_values(by='open_interest_usd', ascending=False, inplace=True)
#liquidity_df.head(30) / 1_000_000

volume_thresh = 5 
open_int_thresh = 10

symbols = liquidity_df.query(
    f'open_interest_usd > {open_int_thresh} and usd_volume_1d > {volume_thresh} and max_leverage >= 5'
).index.tolist()
symbols = [symbol for symbol in symbols if symbol != 'BERA']
(liquidity_df.loc[symbols])[['usd_volume_1d', 'open_interest_usd', 'max_leverage']].T.round(1)

In [None]:
def historical_funding_rate(ticker, start=None, end=None):
    #hyperliquid.fetchFundingRateHistory(symbol=f"{ticker}/USDC:USDC", since=start, limit=500)
    all_funding_rates = []
    while start < end:
        funding_rate_history = hyperliquid.fetchFundingRateHistory(symbol=f"{ticker}/USDC:USDC", since=start, limit=500)
        if not funding_rate_history:
            break
        all_funding_rates.extend(funding_rate_history)
        start = funding_rate_history[-1]['timestamp'] + 1  # Move start to the next timestamp

    result = {entry['timestamp']: entry['fundingRate'] for entry in all_funding_rates}
    return result


def historical_close_volume(ticker, start=None, end=None):
    all_data = []
    while start < end:
        ohlcv_data = hyperliquid.fetch_ohlcv(symbol=f"{ticker}/USDC:USDC", timeframe='1h', since=start, limit=500)
        if not ohlcv_data:
            break
        all_data.extend(ohlcv_data)
        start = ohlcv_data[-1][0] + 1  # Move start to the next timestamp

    result = {entry[0]: {'close': entry[4], 'volume': entry[5]} for entry in all_data}  # Extract timestamp, close price, and volume
    return result


def get_historical_data(ticker, start, end):
    funding_rate = historical_funding_rate(ticker, start, end)
    close_volume = historical_close_volume(ticker, start, end)

    df = pd.DataFrame({
        'funding': funding_rate,
        'close': {timestamp: data['close'] for timestamp, data in close_volume.items()},
        'volume': {timestamp: data['volume'] for timestamp, data in close_volume.items()},
    }).sort_index()
    df.index = pd.to_datetime(df.index, unit='ms', utc=True)
    df = df.resample('h').last()    
    return df

In [None]:
start = int((datetime.now() - timedelta(days=90)).timestamp() * 1000)
end = int(datetime.now().timestamp() * 1000)

bera_df = get_historical_data('BERA', start, end)
bera_df.head()

In [None]:
data = {}

start = int(bera_df.index[0].timestamp() * 1000)
for symbol in tqdm(symbols):
    data[symbol] = get_historical_data(symbol, start, end)

In [None]:
funding_df = pd.DataFrame({k: df['funding'] for k, df in data.items()})
price_df = pd.DataFrame({k: df['close'] for k, df in data.items()})
volume_df = pd.DataFrame({k: df['volume'] for k, df in data.items()})
notional_df = price_df * volume_df

funding_df.head()
price_df.head()
notional_df.head()

In [None]:
from matplotlib.ticker import FuncFormatter

ax = (funding_df.mean() * 24 * 365 * 100).sort_values(ascending=False).plot.bar(figsize=(12, 3), title='Annualized Funding Rate')
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.0f}%'))

In [None]:
start = '2025-02-15 13:00:00+00:00'

bera_returns = bera_df['close'].loc[start:].pct_change().dropna()
asset_returns = price_df.loc[start:].pct_change().dropna()
bera_returns, asset_returns = bera_returns.align(asset_returns, join='inner', axis=0)

mean_returns = asset_returns.mean()
cov_matrix = asset_returns.cov()

correlations = asset_returns.corrwith(bera_returns)

pd.DataFrame({
    'corr since 2/15/25': correlations.sort_values(ascending=False)
}).T.style.format("{:.1%}")


In [None]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

n_components = 5
pca = PCA(n_components=n_components)
pca_scores = pca.fit_transform(asset_returns)

pc1 = pca.transform(asset_returns)[:, 0]

explained_variance = []
for i in range(n_components):
    pc = pca_scores[:, i].reshape(-1, 1)
    model = LinearRegression().fit(pc, bera_returns)
    r2 = model.score(pc, bera_returns)
    explained_variance.append(r2 * 100)  # percent

# Step 4: Output results
for i, r2 in enumerate(explained_variance, start=1):
    print(f"PC{i} explains {r2:.2f}% of BERA's return variation.")

pca = PCA(n_components=1)
pc1 = pca.fit_transform(asset_returns).flatten()

correlation = np.corrcoef(pc1, bera_returns.values)[0, 1]
print(f"Correlation between BERA returns and market component (PC1): {correlation:.3f}")

pc1_weights = pd.Series(
    pca.components_[0],
    index=asset_returns.columns,
    name='PC1 loading'
)
pca_market_portfolio = pc1_weights / pc1_weights.abs().sum()


In [None]:
notional_df.mean() * 24 / 1_000_000

In [None]:
import cvxpy as cp

funding_vector = -(funding_df[asset_returns.columns].mean() * 24 * 365).values

pca = PCA(n_components=1)  # you can tune this
pca_factors = pca.fit_transform(asset_returns)

# Step 2: Reconstruct returns using top components only
cleaned_returns = pca.inverse_transform(pca_factors)
cleaned_returns_df = pd.DataFrame(cleaned_returns, index=asset_returns.index, columns=asset_returns.columns)

# Align indices
bera_returns, asset_returns = bera_returns.align(cleaned_returns_df, join='inner', axis=0)

# Convert to numpy
R = asset_returns.values
r_bera = bera_returns.values
n_assets = R.shape[1]
w = cp.Variable(n_assets)

r_portfolio = R @ w

tracking_error = cp.sum_squares(r_portfolio - r_bera)
funding_penalty = funding_vector @ w  # linear penalty
lambda_funding = 0.0001 # you can tune this

objective = cp.Minimize(tracking_error + lambda_funding * funding_penalty)

constraints = [cp.sum(w) == 1, w >= 0]

problem = cp.Problem(objective, constraints)
problem.solve()

optimal_weights = pd.Series(w.value, index=asset_returns.columns)
optimal_weights = pd.Series(w.value, index=asset_returns.columns)
filtered_weights = optimal_weights[optimal_weights > 1e-3]  # filter threshold = 0.001

pd.DataFrame({
    'weights': filtered_weights,
    'funding rate (ann.)': -pd.Series(funding_vector, index=asset_returns.columns),
}).dropna().sort_values(by='weights', ascending=False).round(3).style.format("{:.1%}")
#filtered_weights.sort_values(ascending=False).round(3)

In [None]:
# Recreate portfolio returns
tracked_returns = asset_returns.dot(optimal_weights)

tracked_returns.rolling(24*7).corr(bera_returns).dropna().plot(
    figsize=(12, 3), 
    title='7-Day Rolling Correlation with BERA'
)

In [None]:
# Recreate portfolio returns
tracked_returns = asset_returns.dot(optimal_weights)

# Correlation with BERA
correlation = tracked_returns.corr(bera_returns)
print(f"Optimized correlation: {correlation:.5f}")

# Optional: Plot cumulative returns
((1 + bera_returns).cumprod() - 1).plot(label="BERA", linewidth=2)
((1 + tracked_returns).cumprod() - 1).plot(label="Optimized Basket", linewidth=2)
plt.title("Tracking Optimization vs. BERA")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
funding_vector = -(funding_df[asset_returns.columns].mean() * 24 * 365).values

pca = PCA(n_components=1)  # you can tune this
pca_factors = pca.fit_transform(asset_returns)

# Step 2: Reconstruct returns using top components only
cleaned_returns = pca.inverse_transform(pca_factors)
cleaned_returns_df = pd.DataFrame(cleaned_returns, index=asset_returns.index, columns=asset_returns.columns)
residual_returns = asset_returns - cleaned_returns_df

# Align indices
bera_returns, residual_returns = bera_returns.align(residual_returns, join='inner', axis=0)
R = residual_returns.values
r_bera = bera_returns.values

n_assets = R.shape[1]
w = cp.Variable(n_assets)

r_portfolio = R @ w

tracking_error = cp.sum_squares(r_portfolio - r_bera)
funding_penalty = funding_vector @ w  # linear penalty
lambda_funding = 0.0001 # you can tune this

objective = cp.Minimize(tracking_error + lambda_funding * funding_penalty)

constraints = [cp.sum(w) == 1, w >= 0]

problem = cp.Problem(objective, constraints)
problem.solve()

optimal_weights = pd.Series(w.value, index=asset_returns.columns)
optimal_weights = pd.Series(w.value, index=asset_returns.columns)
filtered_weights = optimal_weights[optimal_weights > 1e-3]  # filter threshold = 0.001

pd.DataFrame({
    'weights': filtered_weights,
    'funding rate (ann.)': -pd.Series(funding_vector, index=asset_returns.columns),
}).dropna().sort_values(by='weights', ascending=False).round(3).style.format("{:.1%}")
#filtered_weights.sort_values(ascending=False).round(3)

In [None]:
# account for volume

daily_volume = notional_df.mean() * 24
basket_size = 5_000_000
max_pct_daily_volume = 0.05

# Compute max allowed notional per asset
max_allocation_usd = daily_volume * max_pct_daily_volume

# Convert to max weights relative to total basket size
max_weights = max_allocation_usd / basket_size

# Align and convert to numpy
max_weights = max_weights[asset_returns.columns].fillna(1.0)  # fallback to 100% if volume missing

funding_vector = -(funding_df[asset_returns.columns].mean() * 24 * 365).values

pca = PCA(n_components=1)  # you can tune this
pca_factors = pca.fit_transform(asset_returns)

# Step 2: Reconstruct returns using top components only
cleaned_returns = pca.inverse_transform(pca_factors)
cleaned_returns_df = pd.DataFrame(cleaned_returns, index=asset_returns.index, columns=asset_returns.columns)

# Align indices
bera_returns, asset_returns = bera_returns.align(cleaned_returns_df, join='inner', axis=0)

# Convert to numpy
R = asset_returns.values
r_bera = bera_returns.values
n_assets = R.shape[1]
w = cp.Variable(n_assets)

r_portfolio = R @ w

tracking_error = cp.sum_squares(r_portfolio - r_bera)
funding_penalty = funding_vector @ w  # linear penalty
lambda_funding = 0.0001 # you can tune this

objective = cp.Minimize(tracking_error + lambda_funding * funding_penalty)

constraints = [
    cp.sum(w) == 1,
    w >= 0,
    w <= max_weights.values  # enforce volume constraint
]

problem = cp.Problem(objective, constraints)
problem.solve()

optimal_weights = pd.Series(w.value, index=asset_returns.columns)
optimal_weights = pd.Series(w.value, index=asset_returns.columns)
filtered_weights = optimal_weights[optimal_weights > 1e-3]  # filter threshold = 0.001

pd.DataFrame({
    'weights': filtered_weights,
    'funding rate (ann.)': -pd.Series(funding_vector, index=asset_returns.columns),
    'pct_daily_volume ($5m Basket)': (optimal_weights * basket_size) / daily_volume,
}).dropna().sort_values(by='weights', ascending=False).round(3).style.format("{:.1%}")


In [None]:
# Recreate portfolio returns
tracked_returns = asset_returns.dot(optimal_weights)

tracked_returns.rolling(24*7).corr(bera_returns).dropna().plot(
    figsize=(12, 3), 
    title='7-Day Rolling Correlation with BERA'
)

In [None]:
int(pd.to_datetime('2025-03-03 12:00:00+00:00').timestamp())*1000

In [None]:
from basket import HedgeBasket

basket_dates = [
    '2025-03-03 12:00:00+00:00',
    '2025-03-10 12:00:00+00:00',
    '2025-03-17 12:00:00+00:00',
    '2025-03-24 12:00:00+00:00',
    '2025-03-31 12:00:00+00:00',
    '2025-04-07 12:00:00+00:00',
]

baskets = {
    dt: HedgeBasket(
        ticker='BERA',
        timestamp=int(pd.to_datetime(dt).timestamp())*1000,
        basket_size=5_000_000,
        max_pct_daily_volume=0.05,
        lambda_funding=0.1
    ) for dt in basket_dates
}

In [None]:
weights = {}
for dt, basket in tqdm(baskets.items()):
    weights[dt] = basket.optimized_weights['weights']
    print(f"The correlation between the basket and BERA for {dt[:10]} is {basket.correlation:.2f}.")
    #print(weights[dt])

In [None]:
weights_df = pd.DataFrame(weights)

weights_clean = weights_df.fillna(0)
weights_clean.columns = pd.to_datetime(weights_df.columns).date
weights_clean.sort_values(by=weights_clean.columns[0], ascending=False).style.format("{:.1%}")

In [None]:
pd.DataFrame({
    'In Sample Correlation to BERA': pd.Series({dt: basket.correlation for dt, basket in baskets.items()}),
    'Avg Annualized Funding Rate': pd.Series({
        dt: basket.optimized_weights['funding rate (ann.)'].dot(basket.optimized_weights['weights']) for dt, basket in baskets.items()
    }),
}).style.format("{:.1%}")

In [None]:
import time

basket_assets = weights_df.index.to_list()

basket_asset_data = {}
data_start = int(pd.to_datetime(basket_dates[0]).timestamp())*1000
data_end = int(datetime.utcnow().timestamp()) * 1000
for symbol in tqdm(basket_assets):
    basket_asset_data[symbol] = get_historical_data(symbol, data_start, data_end)
    time.sleep(2)

basket_asset_funding = pd.DataFrame({symbol: data['funding'] for symbol, data in basket_asset_data.items()})
basket_asset_close = pd.DataFrame({symbol: data['close'] for symbol, data in basket_asset_data.items()})
basket_asset_returns = basket_asset_close.pct_change()
basket_asset_volume = pd.DataFrame({symbol: data['volume'] for symbol, data in basket_asset_data.items()})
basket_asset_volume_usd = basket_asset_volume * basket_asset_close

benchmark_returns = get_historical_data('BERA', data_start, data_end)

In [None]:
basket_asset_funding.to_clipboard()

In [None]:
correlation = {}
basket_returns = {}
volatilty = {}
benchmark_volatility = {}
for dt, basket in baskets.items():
    basket_returns[dt] = (basket.asset_returns[basket.optimized_weights.index] * basket.optimized_weights['weights']).sum(1)
    correlation[dt] = basket_returns[dt].corr(basket.benchmark_returns)
    volatilty[dt] = basket_returns[dt].std() * np.sqrt(24 * 365)
    benchmark_volatility[dt] = basket.benchmark_returns.std() * np.sqrt(24 * 365)
    #print(f"The correlation between the basket and BERA for {dt[:10]} is {correlation[dt]:.2f}.")

pd.DataFrame({
    'In Sample Correlation to BERA': pd.Series({dt: correlation[dt] for dt, basket in baskets.items()}),
    'Avg Annualized Funding Rate': pd.Series({
        dt: basket.optimized_weights['funding rate (ann.)'].dot(basket.optimized_weights['weights']) for dt, basket in baskets.items()
    }),
    'Basket Volatility': pd.Series(volatilty),
    'Benchmark Volatility': pd.Series(benchmark_volatility),
}).to_clipboard()#.style.format("{:.1%}")


In [None]:
#pnl_data = pd.read_clipboard(index_col=0)


In [None]:
chunk_size = 24*7  # 7-day chunks
spot_array = (pnl_data['spot_pnl'] / 3_000_000).values
hedge_array = (pnl_data['hedge_pnl'] / 3_000_000).values

max_start = len(spot_array) - chunk_size + 1  # +1 to include last possible chunk

# Rolling chunks
spot_chunks = [spot_array[i:i+chunk_size].sum() for i in range(max_start)]
hedge_chunks = [hedge_array[i:i+chunk_size].sum() for i in range(max_start)]

spot_chunks = np.array(spot_chunks)
hedge_chunks = np.array(hedge_chunks)

n_sims = 10000
chunks_per_year = 52  # using daily chunks

spot_annual = []
hedge_annual = []
combined_annual = []

for _ in range(n_sims):
    idx = np.random.randint(0, len(spot_chunks), size=chunks_per_year)
    spot_sample = spot_chunks[idx]
    hedge_sample = hedge_chunks[idx]
    
    spot_return = np.prod(1 + spot_sample) - 1
    hedge_return = np.prod(1 + hedge_sample) - 1
    combined_return = np.prod(1 + (spot_sample + hedge_sample)) - 1

    spot_annual.append(spot_return)
    hedge_annual.append(hedge_return)
    combined_annual.append(combined_return)

spot_annual = np.array(spot_annual)
hedge_annual = np.array(hedge_annual)
combined_annual = np.array(combined_annual)

# Summary percentiles
pct_levels = [.99, .95, .9, .75, .5, .25, .1, .05, .01]

summary_df = pd.DataFrame({
    'percentile': pct_levels,
    'combined': np.percentile(combined_annual, [p * 100 for p in pct_levels]),
    #$'spot_only': np.percentile(spot_annual, [p * 100 for p in pct_levels]),
    #'hedge_only': np.percentile(hedge_annual, [p * 100 for p in pct_levels]),
})

summary_df.to_clipboard(index=False)
summary_df.style.format("{:.1%}")

In [None]:
plt.hist(spot_chunks, bins=50, alpha=0.5, label='spot')
plt.hist(hedge_chunks, bins=50, alpha=0.5, label='hedge')
plt.hist(combined_chunks, bins=50, alpha=0.5, label='combined')
plt.legend()
plt.title("Distribution of Weekly Returns")
plt.show()