In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize

# ------------------------------
# Configuration & Sample Data
# ------------------------------

TICKERS = ['CDX IG', 'CDX HY', 'iTraxx Main', 'iTraxx Xover']
COUPON_RATES = {'CDX IG': 100, 'CDX HY': 500, 'iTraxx Main': 100, 'iTraxx Xover': 500}
CS01_LIMITS = {'CDX IG': 0.25, 'CDX HY': 0.10, 'iTraxx Main': 0.25, 'iTraxx Xover': 0.10}  # in millions
NOTIONAL = 10  # in millions
FREQ = 'D'  # Change to 'W-FRI' for weekly analytics

np.random.seed(42)
dates = pd.date_range('2021-01-01', '2025-01-01', freq=FREQ)

# Simulate desk PnL and CDS spreads
desk_pnl = pd.Series(np.random.normal(0, 0.01 if FREQ == 'D' else 0.05, len(dates)), index=dates)
cds_data = pd.DataFrame({ticker: np.random.normal(100, 10, len(dates)) for ticker in TICKERS}, index=dates)

# ------------------------------
# Utility Functions
# ------------------------------

def calculate_cs01(spread, notional=NOTIONAL, duration=5):
    return notional * duration * 0.0001

def train_pnl_model(features, target):
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def train_signal_model(features, pnl):
    label = (pnl > 0).astype(int)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(features, label)
    return model

def optimize_hedge(expected_pnl, spreads, cs01_values, cs01_limits, tickers):
    roll_multiplier = 0.2 / 252 if FREQ == 'D' else 0.2 / 52
    coupon_multiplier = 1/252 if FREQ == 'D' else 1/52

    roll_cost = roll_multiplier * spreads.values * NOTIONAL * 0.0001
    coupon_cost = np.array([COUPON_RATES[t] * NOTIONAL * coupon_multiplier * 0.0001 for t in tickers])
    total_cost = roll_cost + coupon_cost

    def objective(weights):
        return -np.dot(weights, expected_pnl - total_cost)

    constraints = [{'type': 'ineq', 'fun': lambda w, i=i: cs01_limits[i] - w[i] * cs01_values[i]} 
                   for i in range(len(expected_pnl))]
    bounds = [(0, None) for _ in expected_pnl]
    initial_guess = np.zeros(len(expected_pnl))

    result = minimize(objective, initial_guess, bounds=bounds, constraints=constraints)
    return result.x if result.success else np.zeros(len(expected_pnl))

# ------------------------------
# Strategy Execution
# ------------------------------

def run_model(desk_pnl, cds_data):
    features = cds_data.diff().dropna()
    desk_pnl = desk_pnl[features.index]
    pnl_model = train_pnl_model(features, desk_pnl)
    predicted_pnl = pd.Series(pnl_model.predict(features), index=features.index)

    results = []
    for combo in combinations(TICKERS, 2):
        sub_features = features[list(combo)]
        hedge_pnl = -sub_features * NOTIONAL * 0.5
        avg_hedge_pnl = hedge_pnl.mean().values
        latest_spreads = cds_data.iloc[-1][list(combo)]
        cs01_vals = [calculate_cs01(latest_spreads[t]) for t in combo]
        cs01_lims = [CS01_LIMITS[t] for t in combo]
        weights = optimize_hedge(avg_hedge_pnl, latest_spreads, cs01_vals, cs01_lims, combo)

        roll_multiplier = 0.2 / 252 if FREQ == 'D' else 0.2 / 52
        coupon_multiplier = 1/252 if FREQ == 'D' else 1/52
        roll_cost = roll_multiplier * latest_spreads.values * NOTIONAL * 0.0001
        coupon_cost = np.array([COUPON_RATES[t] * NOTIONAL * coupon_multiplier * 0.0001 for t in combo])
        total_cost = roll_cost + coupon_cost

        net_pnl = predicted_pnl + hedge_pnl @ weights - (weights @ total_cost)
        sharpe = net_pnl.mean() / net_pnl.std()

        results.append({
            'combo': combo,
            'weights': weights,
            'net_pnl': net_pnl,
            'sharpe': sharpe,
            'hedge_pnl': hedge_pnl @ weights,
            'full_hedge_pnl': hedge_pnl
        })

    best = max(results, key=lambda x: x['sharpe'])
    signal_model = train_signal_model(features[list(best['combo'])], best['net_pnl'])
    predicted_signals = pd.Series(signal_model.predict(features[list(best['combo'])]), index=features.index)

    return {
        'combo': best['combo'],
        'weights': best['weights'],
        'net_pnl': best['net_pnl'],
        'signals': predicted_signals,
        'features': features[list(best['combo'])],
        'hedge_pnl': best['hedge_pnl'],
        'desk_pnl': desk_pnl,
        'full_hedge_pnl': best['full_hedge_pnl']
    }

# ------------------------------
# Visualization
# ------------------------------

def visualize_results(results):
    combo = results['combo']
    weights = results['weights']
    net_pnl = results['net_pnl']
    signals = results['signals']

    hedge_notional = pd.Series(weights * NOTIONAL, index=combo)

    plt.figure(figsize=(12, 5))
    plt.plot(net_pnl.cumsum(), label='Net PnL')
    plt.title("Net PnL Over Time")
    plt.xlabel("Date")
    plt.ylabel("Cumulative PnL (MM USD)")
    plt.legend()
    plt.grid(True)
    plt.show()

    signal_df = pd.DataFrame({
        'Signal': signals.replace({1: 'BUY', 0: 'HOLD'}),
        'Net PnL': net_pnl
    })
    print(signal_df.tail(10))

    plt.figure(figsize=(6, 4))
    sns.heatmap(results['features'].corr(), annot=True, cmap='coolwarm')
    plt.title("Feature Correlation")
    plt.show()

    print(f"Best Combo: {combo}")
    print(f"Weights: {weights}")
    print(f"Hedge Notionals (MM): {hedge_notional.to_dict()}")

# ------------------------------
# Run All
# ------------------------------

results = run_model(desk_pnl, cds_data)
visualize_results(results)


ValueError: Shape of passed values is (0, 1), indices imply (0, 4)