In [None]:
import pandas as pd
import numpy as np
import streamlit as st
from itertools import combinations
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import seaborn as sns
from xbbg import blp

# ----------------------------------
# Step 1: Config and Setup
# ----------------------------------

TICKERS = {
    'CDX IG': 'CDX IG CDSI GEN 5Y Corp',
    'CDX HY': 'CDX HY CDSI GEN 5Y Corp',
    'iTraxx Main': 'ITRAXX EUR CDSI GEN 5Y Corp',
    'iTraxx Xover': 'ITRAXX XOVER CDSI GEN 5Y Corp'
}

COUPON_RATES = {
    'CDX IG': 100,    # 100 bps
    'CDX HY': 500,    # 500 bps
    'iTraxx Main': 100,  # 100 bps
    'iTraxx Xover': 500  # 500 bps
}

CS01_LIMITS = {
    'CDX IG': 250000,
    'CDX HY': 100000,
    'iTraxx Main': 250000,
    'iTraxx Xover': 100000
}

NOTIONAL = 10_000_000
START_DATE = '2023-01-01'
END_DATE = pd.Timestamp.today().strftime('%Y-%m-%d')

# ----------------------------------
# Step 2: Data Loading
# ----------------------------------

def load_data(pnl_file):
    # Load PNL data from CSV
    pnl_data = pd.read_csv(pnl_file, parse_dates=['date'])
    pnl_data = pnl_data.set_index('date')
    
    # Resample daily PNL to weekly
    weekly_pnl = pnl_data.resample('W-FRI').sum()
    
    # Fetch CDS data from Bloomberg
    cds_data = fetch_cds_data(TICKERS, START_DATE, END_DATE)
    
    # Align dates between PNL and CDS data
    aligned_data = pd.concat([weekly_pnl, cds_data], axis=1).dropna()
    return aligned_data.iloc[:, 0], aligned_data.iloc[:, 1:]

def fetch_cds_data(tickers, start_date, end_date):
    data = pd.DataFrame()
    for name, bbg_ticker in tickers.items():
        try:
            df = blp.bdh(bbg_ticker, 'PX_LAST', start_date, end_date, Per='W')
            df = df.rename(columns={'PX_LAST': name})
            data = pd.concat([data, df], axis=1)
        except Exception as e:
            print(f"Failed to fetch {bbg_ticker}: {e}")
    return data.dropna()

# ----------------------------------
# Step 3: Modeling & Optimization
# ----------------------------------

def calculate_cs01(spread, notional, duration=5):
    return notional * duration * 0.0001

def train_pnl_model(features, target):
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def train_signal_model(features, pnl):
    label = (pnl > 0).astype(int)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(features, label)
    return model

def optimize_hedge(expected_pnl, spreads, cs01_values, cs01_limits, tickers):
    # Calculate weekly costs (roll + coupon)
    weekly_roll_cost = 0.2 * spreads.values * NOTIONAL / 52 * 0.0001
    weekly_coupon_cost = np.array([COUPON_RATES[t] * NOTIONAL * 1/52 * 0.0001 for t in tickers])
    total_weekly_cost = weekly_roll_cost + weekly_coupon_cost

    def objective(weights):
        return -np.dot(weights, expected_pnl - total_weekly_cost)

    constraints = [{'type': 'ineq', 'fun': lambda w, i=i: cs01_limits[i] - w[i] * cs01_values[i]} 
                 for i in range(len(expected_pnl))]
    bounds = [(0, None) for _ in expected_pnl]
    initial_guess = np.zeros(len(expected_pnl))

    result = minimize(objective, initial_guess, bounds=bounds, constraints=constraints)
    return result.x if result.success else np.zeros(len(expected_pnl))

# ----------------------------------
# Step 4: Backtesting
# ----------------------------------

def backtest_strategy(signals, hedge_pnl_df, desk_pnl, max_hold_weeks=10):
    position = 0
    entry_idx = -1
    pnl_history = []
    cum_hedged = []
    cum_unhedged = []
    open_pnl = 0

    for i in range(len(signals)):
        signal = signals.iloc[i]
        date = signals.index[i]

        if position == 0 and signal == 1:
            position = 1
            entry_idx = i
            open_pnl = 0
        elif position == 1:
            holding_period = i - entry_idx
            if signal == 0 or holding_period > max_hold_weeks or (open_pnl < -50000):
                position = 0
                entry_idx = -1
                open_pnl = 0

        hedge_val = hedge_pnl_df.iloc[i].sum() if position else 0
        total_pnl = desk_pnl.iloc[i] + hedge_val
        open_pnl += hedge_val
        pnl_history.append(total_pnl)

        cum_hedged.append(sum(pnl_history))
        cum_unhedged.append(desk_pnl.iloc[:i + 1].sum())

    return pd.Series(cum_unhedged, index=signals.index), pd.Series(cum_hedged, index=signals.index), pnl_history

# ----------------------------------
# Step 5: Run Model
# ----------------------------------

def run_model(pnl_file):
    desk_pnl, cds_data = load_data(pnl_file)
    features = cds_data.pct_change().dropna()
    desk_pnl = desk_pnl[features.index]  # Align dates
    
    pnl_model = train_pnl_model(features, desk_pnl)
    predicted_pnl = pd.Series(pnl_model.predict(features), index=features.index)

    results = []

    for r in [1, 2]:
        for combo in combinations(TICKERS.keys(), r):
            sub_features = features[list(combo)]
            hedge_pnl = -sub_features * NOTIONAL * 0.5
            avg_hedge_pnl = hedge_pnl.mean().values
            latest_spreads = cds_data.iloc[-1][list(combo)]
            cs01_vals = [calculate_cs01(latest_spreads[t], NOTIONAL) for t in combo]
            cs01_lims = [CS01_LIMITS[t] for t in combo]
            weights = optimize_hedge(avg_hedge_pnl, latest_spreads, cs01_vals, cs01_lims, combo)
            
            # Calculate costs with corrected formula
            roll_cost = 0.2 * latest_spreads.values * NOTIONAL / 52 * 0.0001
            coupon_cost = np.array([COUPON_RATES[t] * NOTIONAL * 1/52 * 0.0001 for t in combo])
            total_cost = roll_cost + coupon_cost
            
            net_pnl = predicted_pnl + hedge_pnl @ weights - (weights @ total_cost)
            sharpe = net_pnl.mean() / net_pnl.std()

            results.append({
                'combo': combo,
                'weights': weights,
                'net_pnl': net_pnl,
                'sharpe': sharpe,
                'hedge_pnl': hedge_pnl @ weights,
                'full_hedge_pnl': hedge_pnl
            })

    best = max(results, key=lambda x: x['sharpe'])
    signal_model = train_signal_model(features[list(best['combo'])], best['net_pnl'])
    predicted_signals = pd.Series(signal_model.predict(features[list(best['combo'])]), index=features.index)

    return {
        'combo': best['combo'],
        'weights': best['weights'],
        'net_pnl': best['net_pnl'],
        'signals': predicted_signals,
        'features': features[list(best['combo'])],
        'hedge_pnl': best['hedge_pnl'],
        'desk_pnl': desk_pnl,
        'full_hedge_pnl': best['full_hedge_pnl']
    }

# ----------------------------------
# Step 6: Streamlit Dashboard
# ----------------------------------

def display_dashboard(results):
    st.set_page_config(layout="wide")
    st.title("📊 CDS Hedging Dashboard with Backtest")

    combo = results['combo']
    weights = results['weights']
    net_pnl = results['net_pnl']
    signals = results['signals']
    hedge_notional = pd.Series(weights * NOTIONAL, index=combo)

    st.markdown(f"""
    ### 🔎 Best Hedge Instruments:
    - **{' and '.join(combo)}**
    - Optimal Weights: {weights}
    - Notional Amounts: {hedge_notional.to_dict()}
    """)

    st.markdown("""
    ### 💰 Cost Structure
    - **Roll Cost**: 20% of spread annually (weekly: 0.2 * spread * notional / 52 * 0.0001)
    - **Coupon Cost**: 
      - IG/Main: 100bps (weekly: 100 * notional / 52 * 0.0001)
      - HY/Xover: 500bps (weekly: 500 * notional / 52 * 0.0001)
    """)

    st.subheader("📈 Net PnL Over Time")
    st.line_chart(net_pnl.rename("Net PnL"))

    st.subheader("📏 Hedge Notional Bar Chart")
    st.bar_chart(hedge_notional.rename("Notional"))

    st.subheader("🛎️ Weekly Hedge Trade Signals")
    signal_df = pd.DataFrame({
        "Date": net_pnl.index,
        "Signal": signals.replace({1: "BUY", 0: "HOLD"}),
        "Net PnL": net_pnl.values
    }).set_index("Date")
    st.dataframe(signal_df.style.highlight_max(axis=0))

    st.subheader("🔬 Feature Correlation Heatmap")
    corr = results['features'].corr()
    fig, ax = plt.subplots()
    sns.heatmap(corr, annot=True, cmap='coolwarm', ax=ax)
    st.pyplot(fig)

    st.subheader("🔁 Strategy Backtest")

    cum_unhedged, cum_hedged, hedged_pnl = backtest_strategy(
        results['signals'],
        results['full_hedge_pnl'],
        results['desk_pnl']
    )

    chart_df = pd.DataFrame({
        'Unhedged Cumulative PnL': cum_unhedged,
        'Hedged Cumulative PnL': cum_hedged
    })
    st.line_chart(chart_df)

    st.markdown(f"""
    **Backtest Summary**
    - Total Unhedged PnL: ${cum_unhedged.iloc[-1]:,.0f}
    - Total Hedged PnL: ${cum_hedged.iloc[-1]:,.0f}
    - Strategy Gain: ${cum_hedged.iloc[-1] - cum_unhedged.iloc[-1]:,.0f}
    """)

# ----------------------------------
# MAIN RUN
# ----------------------------------

if __name__ == "__main__":
    # Add file uploader for PNL CSV
    st.sidebar.title("PNL Data Upload")
    pnl_file = st.sidebar.file_uploader("Upload your PNL CSV file", type=["csv"])
    
    if pnl_file is not None:
        try:
            results = run_model(pnl_file)
            display_dashboard(results)
        except Exception as e:
            st.error(f"Error processing file: {str(e)}")
    else:
        st.info("Please upload a PNL CSV file to begin analysis")