In [12]:
import pandas as pd
import numpy as np
from scipy.stats import norm, kendalltau
import matplotlib.pyplot as plt
import seaborn as sns

def fit_and_compare_copulas(df, var1, var2):
    """
    Compare Gaussian and Student-t copulas using pseudo-observations of two supply chain metrics.
    """
    # Drop NA rows for selected variables
    df = df[[var1, var2]].dropna()

    # Create pseudo-observations using empirical CDFs (ranks)
    ranks1 = df[var1].rank(method='average')
    ranks2 = df[var2].rank(method='average')
    u1 = ranks1 / (len(df) + 1)
    u2 = ranks2 / (len(df) + 1)

    # Gaussian Copula correlation via normal scores
    x1 = norm.ppf(u1)
    x2 = norm.ppf(u2)
    rho_gaussian = np.corrcoef(x1, x2)[0, 1]

    # Student-t approximation via Kendall's Tau
    tau, _ = kendalltau(u1, u2)
    rho_t_approx = np.sin(np.pi / 2 * tau)

    # Display results
    print("\n=== Copula Correlation Summary ===")
    print(f"Gaussian Copula Correlation (ρ): {rho_gaussian:.4f}")
    print(f"Student-t Copula Approx. (ρ):     {rho_t_approx:.4f}")
    print(f"Kendall’s Tau:                    {tau:.4f}")

    # Visualization
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=u1, y=u2, alpha=0.4, s=20)
    plt.title(f"Pseudo-Observations: {var1} vs {var2}")
    plt.xlabel(f"Empirical CDF of {var1}")
    plt.ylabel(f"Empirical CDF of {var2}")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"copula_scatter_{var1.replace(' ', '_')}_{var2.replace(' ', '_')}.png")
    plt.close()

    return {
        "Variable 1": var1,
        "Variable 2": var2,
        "Kendall's Tau": tau,
        "Gaussian Rho": rho_gaussian,
        "Student-t Rho (approx)": rho_t_approx
    }


# Cost Impact proxy vs Delay proxy
copula_result = fit_and_compare_copulas(
    df_cleaned,
    var1='order_profit_per_order',          # Proxy for cost/benefit
    var2='shipping_delay_days'              # Proxy for disruption or recovery time
)


print(copula_result)



=== Copula Correlation Summary ===
Gaussian Copula Correlation (ρ): -0.0030
Student-t Copula Approx. (ρ):     -0.0025
Kendall’s Tau:                    -0.0016
{'Variable 1': 'order_profit_per_order', 'Variable 2': 'shipping_delay_days', "Kendall's Tau": -0.0015846838521709975, 'Gaussian Rho': -0.0030472506401699917, 'Student-t Rho (approx)': -0.0024892130035116793}
