# Multi-Tenor CDX Tranche Pricing & Correlation Analysis

This notebook implements pricing of CDX.NA.IG.45 tranches across multiple maturities (1Y, 2Y, 3Y, 5Y, 7Y, 10Y) using:
1. **Gaussian Copula Model**: One-factor Li (2000) model
2. **G-VG Copula Model**: Mixed Gaussian-Variance Gamma copula

We analyze:
- Correlation term structure
- Base correlation surface
- Model performance across different tenors

In [1]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
from scipy.optimize import minimize_scalar, minimize
from scipy.interpolate import interp1d
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

## 1. Load Multi-Tenor Data

In [2]:
# Load constituent CDS spreads
constituents = pd.read_csv('../data/cdx_constituents_multi_tenor.csv')

# Load multi-tenor tranche market data
with open('../data/cdx_market_data_multi_tenor.json', 'r') as f:
    market_data_multi = json.load(f)

# Load OIS curve
ois_curve = pd.read_csv('../data/ois_curve.csv')

print("="*60)
print("DATA LOADED")
print("="*60)
print(f"\nConstituents: {len(constituents)} companies")
print(f"Tenors available: {list(market_data_multi.keys())}")
print(f"\nTenor spreads available for each company:")
print([col for col in constituents.columns if 'Yr' in str(col) or 'Mo' in str(col)])
print(f"\nOIS Curve points: {len(ois_curve)}")
print("\nFirst 3 constituents:")
print(constituents[['Company', 'Recovery rate', 'Running Spread', '1 Yr', '2 Yr', '3 Yr', '5 Yr', '7 Yr', '10 Yr']].head(3))

DATA LOADED

Constituents: 125 companies
Tenors available: ['1Y', '2Y', '3Y', '5Y', '7Y', '10Y']

Tenor spreads available for each company:
['6 Mo', '1 Yr', '2 Yr', '3 Yr', '4 Yr', '5 Yr', '7 Yr', '10 Yr']

OIS Curve points: 31

First 3 constituents:
              Company  Recovery rate  Running Spread   1 Yr   2 Yr    3 Yr  \
0    Advanced Micro D            0.4             500  18.44  22.87   28.62   
1  Ally Financial Inc            0.4             500  58.23  83.18  101.84   
2    Altria Group Inc            0.4             100  15.49  20.33   27.20   

     5 Yr    7 Yr   10 Yr  
0   36.40   62.36   79.54  
1  147.35  193.07  224.75  
2   39.40   60.80   76.15  


## 2. Core Pricing Functions

### 2.1 Survival Probability Bootstrap

In [3]:
def get_discount_factor(t, ois_curve_df):
    """Get discount factor at time t using OIS curve"""
    # Parse tenor strings to years
    def tenor_to_years(tenor):
        if 'W' in tenor:
            return int(tenor.replace('W', '')) / 52
        elif 'M' in tenor and 'Y' not in tenor:
            return int(tenor.replace('M', '')) / 12
        elif 'Y' in tenor:
            return int(tenor.replace('Y', ''))
        return 0
    
    ois_curve_df['Years'] = ois_curve_df['Tenor'].apply(tenor_to_years)
    ois_curve_df = ois_curve_df.sort_values('Years')
    
    # Linear interpolation of rates
    interp_func = interp1d(ois_curve_df['Years'], ois_curve_df['Mid_Yield']/100, 
                           kind='linear', fill_value='extrapolate')
    rate = float(interp_func(t))
    
    return np.exp(-rate * t)

def bootstrap_survival_probability(spread, recovery, maturity, dt=0.25):
    """
    Simplified bootstrap using constant hazard rate assumption.
    Matches Chen et al. (2014) methodology and other notebooks.

    Args:
        spread: CDS spread in basis points
        recovery: Recovery rate (0-1)
        maturity: Time to maturity in years
        dt: Time step (not used in simplified version, kept for compatibility)

    Returns:
        Function that returns survival probability Q(t)
    """
    # Convert spread from bps to decimal
    spread_decimal = spread / 10000.0

    # Calculate constant hazard rate: λ = spread / (1 - recovery)
    hazard_rate = spread_decimal / (1 - recovery)

    # Return survival probability function: Q(t) = exp(-λ * t)
    return lambda t: np.exp(-hazard_rate * t)

### 2.2 Gaussian Copula Loss Distribution

In [4]:
def gaussian_copula_loss_distribution(survival_probs, recoveries, correlation, maturity, M=1000):
    """
    Calculate loss distribution using Gaussian copula (Large Homogeneous Portfolio approximation)
    
    Args:
        survival_probs: List of survival probability functions
        recoveries: List of recovery rates
        correlation: Asset correlation (rho)
        maturity: Time horizon
        M: Number of grid points for market factor
    
    Returns:
        loss_grid: Loss levels (as fraction of portfolio)
        loss_dist: Probability distribution of losses
    """
    N = len(survival_probs)
    lgd = np.array([1 - r for r in recoveries]) / N  # Loss given default per name
    
    # Market factor grid
    z_grid = np.linspace(-5, 5, M)
    dz = z_grid[1] - z_grid[0]
    
    # Calculate conditional default probabilities for each market factor realization
    loss_dist_list = []
    
    for z in z_grid:
        # Conditional default probability for each name given market factor Z
        cond_default_probs = []
        for i in range(N):
            Q_T = max(0.001, min(0.999, survival_probs[i](maturity)))
            threshold = norm.ppf(1 - Q_T)  # Default threshold
            
            # Conditional default probability: P(ε < threshold | Z=z)
            cond_prob = norm.cdf((threshold - np.sqrt(correlation) * z) / np.sqrt(1 - correlation))
            cond_default_probs.append(cond_prob)
        
        # Expected loss given market factor z (LHP approximation)
        expected_loss = sum([lgd[i] * cond_default_probs[i] for i in range(N)])
        
        # Probability of this market factor
        prob_z = norm.pdf(z) * dz
        
        loss_dist_list.append((expected_loss, prob_z))
    
    # Aggregate loss distribution
    loss_grid = np.array([l[0] for l in loss_dist_list])
    loss_prob = np.array([l[1] for l in loss_dist_list])
    
    return loss_grid, loss_prob


### 2.3 Tranche Pricing

In [5]:
def price_tranche(loss_grid, loss_prob, attachment, detachment, recovery=0.4, 
                   running_spread=500, ois_curve_df=None, maturity=5.0, dt=0.25):
    """
    Price a CDO tranche given loss distribution
    
    Returns:
        fair_spread: Fair running spread in bps
        upfront: Upfront payment as % of tranche notional
    """
    tranche_size = detachment - attachment
    times = np.arange(dt, maturity + dt, dt)
    
    # Calculate tranche loss for each scenario
    tranche_losses = np.maximum(0, np.minimum(loss_grid - attachment, tranche_size)) / tranche_size
    
    # Expected tranche loss (default leg)
    expected_loss = np.sum(tranche_losses * loss_prob)
    
    # Calculate RPV01 (premium leg)
    rpv01 = 0
    for t in times:
        df = get_discount_factor(t, ois_curve_df)
        # Expected tranche survival = 1 - expected loss up to time t
        expected_survival = 1 - expected_loss * (t / maturity)  # Simplified
        rpv01 += df * dt * expected_survival
    
    # Fair spread: Expected Loss / RPV01
    df_maturity = get_discount_factor(maturity, ois_curve_df)
    fair_spread = (expected_loss * df_maturity / rpv01) * 10000  # in bps
    
    # Upfront payment
    upfront_pct = ((fair_spread - running_spread) / 10000) * rpv01 * 100  # as percentage
    
    return fair_spread, upfront_pct, expected_loss



## 3. Gaussian Copula Model - Multi-Tenor Pricing

We will price all 6 tenors (1Y, 2Y, 3Y, 5Y, 7Y, 10Y) and calibrate correlation for each.

In [6]:
def calibrate_gaussian_correlation_for_tenor(tenor, constituents_df, market_data, ois_curve_df):
    """Calibrate Gaussian copula correlation for a specific tenor"""
    print(f"\nCalibrating Gaussian Copula for {tenor}...")
    
    maturity = float(tenor.replace('Y', ''))
    spread_col = tenor.replace('Y', ' Yr')
    
    # Bootstrap survival probabilities using simplified approach
    survival_probs = []
    recoveries = []
    
    for idx, row in constituents_df.iterrows():
        recovery = row['Recovery rate']
        cds_spread = row[spread_col]
        
        # Use simplified bootstrap (consistent with other notebooks)
        surv_prob = bootstrap_survival_probability(
            cds_spread, recovery, maturity
        )
        survival_probs.append(surv_prob)
        recoveries.append(recovery)
    
    market_tranches = market_data[tenor]
    
    def objective(rho):
        rho = max(0.01, min(0.99, rho))
        loss_grid, loss_prob = gaussian_copula_loss_distribution(
            survival_probs, recoveries, rho, maturity
        )
        
        total_error = 0
        # Weight equity tranche more but don't ignore others
        weights = [2.0, 1.0, 1.0, 1.0, 1.0]
        
        tranches = [
            (0.00, 0.03, market_tranches['equity_0_3_running'], weights[0]),
            (0.03, 0.07, market_tranches['mezz_3_7'], weights[1]),
            (0.07, 0.10, market_tranches['mezz_7_10'], weights[2]),
            (0.10, 0.15, market_tranches['senior_10_15'], weights[3]),
            (0.15, 1.00, market_tranches['senior_15_100'], weights[4])
        ]
        
        for attach, detach, mkt_spread, weight in tranches:
            try:
                model_spread, _, _ = price_tranche(
                    loss_grid, loss_prob, attach, detach, 
                    running_spread=500 if attach == 0 else 100,
                    ois_curve_df=ois_curve_df, maturity=maturity
                )
                # Normalize error by dividing by 100 to put on same scale
                error = ((model_spread - mkt_spread) / 100) ** 2
                total_error += weight * error
            except:
                total_error += 1e6
        
        return total_error
    
    # Expand bounds to allow wider range
    result = minimize_scalar(objective, bounds=(0.05, 0.95), method='bounded')
    optimal_rho = result.x
    
    print(f"  Optimal correlation: {optimal_rho:.4f} ({optimal_rho*100:.2f}%)")
    return optimal_rho, survival_probs, recoveries

### 3.1 Calibrate for All Tenors

In [7]:
tenors = ['1Y', '2Y', '3Y', '5Y', '7Y', '10Y']
gaussian_results = {}

for tenor in tenors:
    optimal_rho, surv_probs, recoveries = calibrate_gaussian_correlation_for_tenor(
        tenor, constituents, market_data_multi, ois_curve
    )
    gaussian_results[tenor] = {
        'correlation': optimal_rho,
        'survival_probs': surv_probs,
        'recoveries': recoveries
    }

print("\n" + "="*60)
print("GAUSSIAN COPULA - CORRELATION TERM STRUCTURE")
print("="*60)
for tenor in tenors:
    print(f"{tenor}: ρ = {gaussian_results[tenor]['correlation']:.4f}")


Calibrating Gaussian Copula for 1Y...
  Optimal correlation: 0.5711 (57.11%)

Calibrating Gaussian Copula for 2Y...
  Optimal correlation: 0.5748 (57.48%)

Calibrating Gaussian Copula for 3Y...
  Optimal correlation: 0.5809 (58.09%)

Calibrating Gaussian Copula for 5Y...
  Optimal correlation: 0.6085 (60.85%)

Calibrating Gaussian Copula for 7Y...
  Optimal correlation: 0.6171 (61.71%)

Calibrating Gaussian Copula for 10Y...
  Optimal correlation: 0.6211 (62.11%)

GAUSSIAN COPULA - CORRELATION TERM STRUCTURE
1Y: ρ = 0.5711
2Y: ρ = 0.5748
3Y: ρ = 0.5809
5Y: ρ = 0.6085
7Y: ρ = 0.6171
10Y: ρ = 0.6211
