In [None]:
%load_ext autoreload
%autoreload 2

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import sys
from tqdm import tqdm

import matplotlib.pyplot as plt

import ccxt

In [None]:
hyperliquid = ccxt.hyperliquid()
markets = hyperliquid.load_markets()

In [None]:
filters = {
    'swap': True,
    'active': True,
}

volume_limit = 100

perp_markets = []
usd_volume_1d = {}
open_interest = {}
mid_px = {}
for symbol, details in markets.items():
    fits_filters = True
    for k, v in filters.items():
        if details.get(k) != v:
            fits_filters = False
            break
    if fits_filters:
        perp_markets.append({symbol: details})
        usd_volume_1d[symbol.replace('/USDC:USDC', '')] = details['info']['dayNtlVlm']
        open_interest[symbol.replace('/USDC:USDC', '')] = details['info']['openInterest']
        mid_px[symbol.replace('/USDC:USDC', '')] = details['info']['midPx']

liquidity_df = pd.DataFrame({
    'usd_volume_1d': usd_volume_1d,
    'open_interest': open_interest,
    'mid_px': mid_px,  
}).astype(float).sort_values(by='usd_volume_1d', ascending=False)

liquidity_df['open_interest_usd'] = liquidity_df['open_interest'] * liquidity_df['mid_px']
#liquidity_df.sort_values(by='open_interest_usd', ascending=False, inplace=True)
#liquidity_df.head(30) / 1_000_000

volume_thresh = 5 * 1_000_000
open_int_thresh = 10 * 1_000_000

symbols = liquidity_df.query(f'open_interest_usd > {open_int_thresh} and usd_volume_1d > {volume_thresh}').index.tolist()
symbols = [symbol for symbol in symbols if symbol != 'BERA']
liquidity_df.loc[symbols] / 1_000_000

In [None]:
def historical_funding_rate(ticker, start=None, end=None):
    #hyperliquid.fetchFundingRateHistory(symbol=f"{ticker}/USDC:USDC", since=start, limit=500)
    all_funding_rates = []
    while start < end:
        funding_rate_history = hyperliquid.fetchFundingRateHistory(symbol=f"{ticker}/USDC:USDC", since=start, limit=500)
        if not funding_rate_history:
            break
        all_funding_rates.extend(funding_rate_history)
        start = funding_rate_history[-1]['timestamp'] + 1  # Move start to the next timestamp

    result = {entry['timestamp']: entry['fundingRate'] for entry in all_funding_rates}
    return result


def historical_close_volume(ticker, start=None, end=None):
    all_data = []
    while start < end:
        ohlcv_data = hyperliquid.fetch_ohlcv(symbol=f"{ticker}/USDC:USDC", timeframe='1h', since=start, limit=500)
        if not ohlcv_data:
            break
        all_data.extend(ohlcv_data)
        start = ohlcv_data[-1][0] + 1  # Move start to the next timestamp

    result = {entry[0]: {'close': entry[4], 'volume': entry[5]} for entry in all_data}  # Extract timestamp, close price, and volume
    return result


def get_historical_data(ticker, start, end):
    funding_rate = historical_funding_rate(ticker, start, end)
    close_volume = historical_close_volume(ticker, start, end)

    df = pd.DataFrame({
        'funding': funding_rate,
        'close': {timestamp: data['close'] for timestamp, data in close_volume.items()},
        'volume': {timestamp: data['volume'] for timestamp, data in close_volume.items()},
    }).sort_index()
    df.index = pd.to_datetime(df.index, unit='ms', utc=True)
    df = df.resample('h').last()    
    return df

In [None]:
start = int((datetime.now() - timedelta(days=90)).timestamp() * 1000)
end = int(datetime.now().timestamp() * 1000)

bera_df = get_historical_data('BERA', start, end)
bera_df.head()

In [None]:
data = {}

start = int(bera_df.index[0].timestamp() * 1000)
for symbol in tqdm(symbols):
    data[symbol] = get_historical_data(symbol, start, end)

In [None]:
funding_df = pd.DataFrame({k: df['funding'] for k, df in data.items()})
price_df = pd.DataFrame({k: df['close'] for k, df in data.items()})
volume_df = pd.DataFrame({k: df['volume'] for k, df in data.items()})
notional_df = price_df * volume_df

funding_df.head()
price_df.head()
notional_df.head()

In [None]:
start = '2025-02-15 00:00:00+00:00'

bera_returns = bera_df['close'].loc[start:].pct_change().dropna()
asset_returns = price_df.loc[start:].pct_change().dropna()
bera_returns, asset_returns = bera_returns.align(asset_returns, join='inner', axis=0)

mean_returns = asset_returns.mean()
cov_matrix = asset_returns.cov()

correlations = asset_returns.corrwith(bera_returns)
correlations.sort_values(ascending=False)

In [None]:
import cvxpy as cp
import numpy as np

# Number of assets
num_assets = asset_returns.shape[1]

# Variables
weights = cp.Variable(num_assets)

# Portfolio return
portfolio_return = asset_returns.values @ weights

# Tracking error
tracking_error = cp.norm(portfolio_return - bera_returns, 'fro')

# Objective function: Minimize tracking error
objective = cp.Minimize(tracking_error)

# Constraints
constraints = [
    cp.sum(weights) == 1,       # Weights sum to 1
    weights >= 0,               # No short selling
    weights <= 0.5              # Example: No more than 20% in any asset
]

# Problem definition
problem = cp.Problem(objective, constraints)

# Solve the problem
problem.solve()

# Optimal weights
optimal_weights = weights.value

In [None]:
portfolio_returns = asset_returns.dot(optimal_weights)
# Align the indices of portfolio_returns and bera_returns
portfolio_returns, bera_returns = portfolio_returns.align(bera_returns, join='inner')
# Calculate the differences between portfolio and benchmark returns
return_differences = portfolio_returns - bera_returns

# Compute the tracking error
tracking_error = np.std(return_differences)
bera_returns.corr(portfolio_returns)

In [None]:
asset_names = asset_returns.columns
portfolio_weights = pd.Series(data=optimal_weights, index=asset_names, name='Weight')
portfolio_weights.sort_values(ascending=False, inplace=True)
portfolio_weights.plot.bar()

In [None]:
# Calculate cumulative returns
cumulative_bera_returns = (1 + bera_returns).cumprod() - 1
cumulative_portfolio_returns = (1 + portfolio_returns).cumprod() - 1

cumulative_bera_returns = bera_returns.cumsum()
cumulative_portfolio_returns = portfolio_returns.cumsum()

plt.figure(figsize=(14, 7))
plt.plot(cumulative_bera_returns, label='BERA Asset Cumulative Returns', color='blue')
plt.plot(cumulative_portfolio_returns, label='Optimized Portfolio Cumulative Returns', color='orange')
plt.title('Cumulative Returns: BERA Asset vs. Optimized Portfolio')
plt.xlabel('Time')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
start = '2025-03-09 00:00:00+00:00'

position_size = 1_000_000
position_size_bera = position_size/ bera_df['close'].loc[start]

bera_spot_pnl = (bera_df['close'].loc[start:] - bera_df['close'].loc[start]) * position_size_bera
bera_perp_pnl = -1 * (bera_df['close'].loc[start:] - bera_df['close'].loc[start]) * position_size_bera
bera_perp_notional = position_size + bera_perp_pnl
bera_funding_costs = bera_df['funding'].loc[start:] * (position_size + bera_perp_pnl)

portfolio_position_sizes = position_size * portfolio_weights[price_df.columns]
portfolio_sizes_quote = portfolio_position_sizes / price_df[portfolio_position_sizes.index].loc[start]

portfolio_perp_pnl = (-1 * (price_df.loc[start:] - price_df.loc[start]) * portfolio_sizes_quote)
portfolio_funding_costs = funding_df.loc[start:] * (portfolio_position_sizes + portfolio_perp_pnl)

pd.DataFrame({
    'BERA Perp': bera_spot_pnl + bera_perp_pnl + bera_funding_costs.fillna(0).cumsum(),
    'Portfolio Perp': (bera_spot_pnl + portfolio_perp_pnl.sum(1) + portfolio_funding_costs.sum(1).cumsum()).dropna(),
}).plot()

bera_funding_costs.sum() * 12

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Calculate returns
returns_df = price_df.pct_change().dropna()

# Standardize the returns
scaler = StandardScaler()
returns_scaled = scaler.fit_transform(returns_df)

# Apply PCA
pca = PCA()
pca.fit(returns_scaled)

# Obtain principal components
principal_components = pca.transform(returns_scaled)

# Create a DataFrame for principal components
pc_df = pd.DataFrame(data=principal_components, index=returns_df.index)

# Correlate the first principal component with the target asset's returns
target_returns = bera_df['close'].pct_change().dropna()
correlation_with_pc1 = pc_df.iloc[:, 0].corr(target_returns)
print(f"Correlation with PC1: {correlation_with_pc1}")

In [None]:
# Get loadings (eigenvectors)
loadings = pca.components_

# Focus on the first principal component
pc1_loadings = loadings[0]

# Create a DataFrame for asset loadings
loadings_df = pd.DataFrame(data=pc1_loadings, index=returns_df.columns, columns=['PC1 Loading'])

# Normalize loadings to sum to 1 for weight adjustment
loadings_df['Adjusted Weight'] = loadings_df['PC1 Loading'] / loadings_df['PC1 Loading'].sum()

# Display adjusted weights
print(loadings_df)

In [None]:
# Apply adjusted weights to the asset basket
optimized_weights = loadings_df['Adjusted Weight']

# Ensure no negative weights (if short selling is not allowed)
#optimized_weights = optimized_weights.clip(lower=0)

# Normalize weights to sum to 1
optimized_weights /= optimized_weights.sum()

print("Optimized Portfolio Weights:")
print(optimized_weights)

In [None]:
# Calculate the portfolio returns with the new weights
optimized_portfolio_returns = (returns_df * optimized_weights).sum(axis=1)

# Correlate the optimized portfolio returns with the target asset's returns
optimized_correlation = optimized_portfolio_returns.corr(target_returns)
print(f"Optimized Correlation with Target Asset: {optimized_correlation}")

In [None]:
from sklearn.decomposition import PCA

# Step 1: Run PCA on asset returns
pca = PCA(n_components=5)  # you can tune this
pca_factors = pca.fit_transform(asset_returns)

# Step 2: Reconstruct returns using top components only
cleaned_returns = pca.inverse_transform(pca_factors)
cleaned_returns_df = pd.DataFrame(cleaned_returns, index=asset_returns.index, columns=asset_returns.columns)

# Align indices
bera_returns, asset_returns = bera_returns.align(cleaned_returns_df, join='inner', axis=0)

# Convert to numpy
R = asset_returns.values
r_bera = bera_returns.values

n_assets = R.shape[1]
w = cp.Variable(n_assets)

# Predicted portfolio return
r_portfolio = R @ w

# Objective: Minimize squared tracking error
objective = cp.Minimize(cp.sum_squares(r_portfolio - r_bera))

# Constraints: weights sum to 1, and optional bounds
constraints = [cp.sum(w) == 1, w >= 0]

# Solve
problem = cp.Problem(objective, constraints)
problem.solve()

# Extract weights
optimal_weights = pd.Series(w.value, index=asset_returns.columns)
optimal_weights

In [None]:
# Recreate portfolio returns
tracked_returns = asset_returns.dot(optimal_weights)

# Correlation with BERA
correlation = tracked_returns.corr(bera_returns)
print(f"Optimized correlation: {correlation:.5f}")

# Optional: Plot cumulative returns
((1 + bera_returns).cumprod() - 1).plot(label="BERA", linewidth=2)
((1 + tracked_returns).cumprod() - 1).plot(label="Optimized Basket", linewidth=2)
plt.title("Tracking Optimization vs. BERA")
plt.legend()
plt.grid(True)
plt.show()