In [None]:
%pip install yfinance --upgrade --no-cache-dir

In [None]:
from IPython.display import display, Latex
import pandas as pd
import numpy as np
import yfinance as yf
from scipy.optimize import minimize

In [None]:
# For reference:
sector_tickers = {
    'Materials': 'XLB',
    'Communication Services': 'XLC',
    'Energy': 'XLE',
    'Financials': 'XLF',
    'Industrials': 'XLI',
    'Information Technology': 'XLK',
    'Consumer Staples': 'XLP',
    'Real Estate': 'XLRE',
    'Utilities': 'XLU',
    'Health Care': 'XLV',
    'Consumer Discretionary': 'XLY'
}

# Gather known data:

As of 2/18/2025

**Weight vector of the 11 sector ETFs**

In [None]:
display(Latex('w'))

<IPython.core.display.Latex object>

From the "Sector Allocation" section of SPY's website: https://www.ssga.com/us/en/individual/etfs/spdr-sp-500-etf-trust-spy

In [None]:
weight_series = pd.Series({'XLB': 0.0198, 'XLC': 0.0966, 'XLE': 0.0321, 'XLF': 0.1411, 'XLI': 0.0822, 'XLK': 0.3182, 'XLP': 0.0559, 'XLRE': 0.0208, 'XLU': 0.0235, 'XLV': 0.1014, 'XLY': 0.1084})
weight_series

Unnamed: 0,0
XLB,0.0198
XLC,0.0966
XLE,0.0321
XLF,0.1411
XLI,0.0822
XLK,0.3182
XLP,0.0559
XLRE,0.0208
XLU,0.0235
XLV,0.1014


In [None]:
# Create a list of the ticker symbols:
sector_etfs = weight_series.index.tolist()
n_sectors = len(sector_etfs)

In [None]:
# This is the weight vector we'll be working with:
w = weight_series.to_numpy()

**Implied volatility of SPY**

In [None]:
display(Latex('σ_\mathrm{SPY}'))

<IPython.core.display.Latex object>

30-day IV from Market Chameleon to keep things simple: https://marketchameleon.com/Overview/SPY/DailyHistory

In [None]:
implied_vol_SPY = 0.116

**Diagonal matrix of the sector implied volatilities**

In [None]:
display(Latex('V'))

<IPython.core.display.Latex object>

30-day IVs from Market Chameleon to keep things simple

In [None]:
implied_vol_series = pd.Series({'XLB': 0.15, 'XLC': 0.127, 'XLE': 0.188, 'XLF': 0.136, 'XLI': 0.127, 'XLK': 0.19, 'XLP': 0.117, 'XLRE': 0.161, 'XLU': 0.158, 'XLV': 0.122, 'XLY': 0.171})
implied_vol_series

Unnamed: 0,0
XLB,0.15
XLC,0.127
XLE,0.188
XLF,0.136
XLI,0.127
XLK,0.19
XLP,0.117
XLRE,0.161
XLU,0.158
XLV,0.122


In [None]:
V = np.diag(implied_vol_series)

**Benchmark correlation matrix**

In [None]:
display(Latex('C_B'))

<IPython.core.display.Latex object>

Historical correlation matrix to keep things simple

In [None]:
# Gather daily returns over the past month:
return_df = yf.download(sector_etfs, start='2025-01-16', end='2025-02-19', rounding=True, progress=False)['Close'].pct_change().dropna()
return_df

Ticker,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2025-01-17,0.006495,0.0087,0.007722,0.008432,0.005988,0.015662,0.005729,-0.000243,0.001397,-0.006731,0.014023
2025-01-21,0.012793,0.009036,-0.005109,0.008362,0.020544,0.008329,0.004661,0.018257,0.015348,0.01655,0.007329
2025-01-22,-0.008048,0.009973,-0.018186,-0.004738,-0.002347,0.022579,-0.004897,-0.017452,-0.021611,-0.002246,-0.003768
2025-01-23,0.005522,0.00262,0.004576,0.005753,0.010125,0.001781,0.003626,0.008029,0.004724,0.013363,0.006043
2025-01-24,-0.002578,0.009044,-0.009653,0.002959,-0.004235,-0.010421,0.005161,0.002896,0.010166,0.001596,-0.006093
2025-01-27,0.000562,0.005179,-0.010404,0.011013,-0.013398,-0.049018,0.027086,0.009868,-0.023148,0.022452,0.005695
2025-01-28,-0.003257,0.004855,-0.009517,-0.001751,-0.00661,0.026673,-0.015623,-0.012154,-0.01378,-0.006981,0.00121
2025-01-29,-0.001803,0.004437,0.002235,-0.000779,-0.003399,-0.008132,0.002412,-0.01158,0.002089,-0.00587,-0.00285
2025-01-30,0.009481,0.000982,0.005128,0.009555,0.011176,0.001856,0.00988,0.01318,0.02085,0.011534,0.009786
2025-01-31,-0.007267,0.004021,-0.02795,-0.005795,-0.007034,-0.005858,-0.009407,-0.002168,-0.005872,-0.00319,-0.004417


In [None]:
hist_corr_matrix = return_df.corr()
hist_corr_matrix

Ticker,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
XLB,1.0,0.15303,0.535379,0.599706,0.653714,0.206573,0.463762,0.644107,0.520719,0.357438,0.604976
XLC,0.15303,1.0,-0.086916,-0.116191,0.014299,0.097612,-0.08266,-0.220578,-0.309697,-0.090116,0.572484
XLE,0.535379,-0.086916,1.0,0.083319,0.385097,0.239254,0.164225,0.283773,0.410881,0.021961,0.180012
XLF,0.599706,-0.116191,0.083319,1.0,0.376821,-0.136931,0.597102,0.721625,0.275404,0.505862,0.404931
XLI,0.653714,0.014299,0.385097,0.376821,1.0,0.468872,0.100547,0.526084,0.659575,0.204593,0.415211
XLK,0.206573,0.097612,0.239254,-0.136931,0.468872,1.0,-0.566091,-0.186815,0.159124,-0.428368,0.2293
XLP,0.463762,-0.08266,0.164225,0.597102,0.100547,-0.566091,1.0,0.618028,0.218253,0.666901,0.164891
XLRE,0.644107,-0.220578,0.283773,0.721625,0.526084,-0.186815,0.618028,1.0,0.576828,0.710839,0.244409
XLU,0.520719,-0.309697,0.410881,0.275404,0.659575,0.159124,0.218253,0.576828,1.0,0.181602,-0.004798
XLV,0.357438,-0.090116,0.021961,0.505862,0.204593,-0.428368,0.666901,0.710839,0.181602,1.0,0.194219


In [None]:
C_B = hist_corr_matrix.to_numpy()

# Solve for the implied covariance matrix:

We must first solve for the implied correlation matrix

**Implied correlation matrix**

In [None]:
display(Latex('C'))

<IPython.core.display.Latex object>

Since there's 11 sectors, we need an 11x11 implied correlation matrix

This means we're solving for 55 unique correlation values

In [None]:
display(Latex('\dfrac{n(n-1)}{2}'))

<IPython.core.display.Latex object>

Structure as a constrained minimization problem

In [None]:
target_variance = implied_vol_SPY**2

In [None]:
# Function to construct the full correlation matrix from the 55 unique values:
def construct_corr_matrix(unique_values):
    C = np.eye(n_sectors)
    upper_triangle_indices = np.triu_indices(n_sectors, k=1)
    C[upper_triangle_indices] = unique_values
    C[(upper_triangle_indices[1], upper_triangle_indices[0])] = unique_values  # Symmetric
    return C

In [None]:
# Frobenius norm of the difference between C and C_B:
def objective(unique_values):
    C = construct_corr_matrix(unique_values)
    return np.linalg.norm(C - C_B, 'fro')  # This is what we're minimizing.

In [None]:
# Portfolio variance equality constraint:
def portfolio_variance_constraint(unique_values):
    C = construct_corr_matrix(unique_values)
    return target_variance - (w.T @ (V @ C @ V) @ w)  # As an equality constraint, this equation must equal zero.

In [None]:
# Positive semidefinite inequality constraint:
def positive_semidefinite_constraint(unique_values):
    C = construct_corr_matrix(unique_values)
    eigenvalues = np.linalg.eigvalsh(C)
    return eigenvalues  # As an inequality constraint, each eigenvalue must be >= 0.

In [None]:
# Use the 55 unique values from C_B as the initial guess:
initial_guess = C_B[np.triu_indices(n_sectors, k=1)]

In [None]:
# Define the constraints:
constraints = [
    {'type': 'eq', 'fun': portfolio_variance_constraint},
    {'type': 'ineq', 'fun': positive_semidefinite_constraint}
]

In [None]:
# Define the bounds for each unique correlation value:
bounds = [(-1, 1)] * len(initial_guess)

In [None]:
# Solve the minimization problem:
result = minimize(
    objective,
    initial_guess,
    method='SLSQP',
    constraints=constraints,
    bounds=bounds,
    options={'disp': True, 'maxiter': 1000}
)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 2.0188529336067362
            Iterations: 62
            Function evaluations: 3812
            Gradient evaluations: 62


In [None]:
# Construct the full implied correlation matrix from the optimized unique values:
C = construct_corr_matrix(result.x)

In [None]:
implied_corr_matrix = pd.DataFrame(C, index=sector_etfs, columns=sector_etfs)
implied_corr_matrix

Unnamed: 0,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
XLB,1.0,0.173214,0.532788,0.61589,0.649469,0.407916,0.499192,0.669723,0.517469,0.382986,0.598816
XLC,0.173214,1.0,-0.017986,0.093977,0.13119,0.550232,-0.054108,-0.215969,-0.274497,0.01972,0.726898
XLE,0.532788,-0.017986,1.0,0.20493,0.461554,0.40683,0.162037,0.27215,0.435835,0.076223,0.286188
XLF,0.61589,0.093977,0.20493,1.0,0.583171,0.502848,0.621577,0.711556,0.339861,0.67799,0.683433
XLI,0.649469,0.13119,0.461554,0.583171,1.0,0.763729,0.099056,0.507982,0.702568,0.298052,0.599846
XLK,0.407916,0.550232,0.40683,0.502848,0.763729,1.0,-0.161071,0.066019,0.26855,0.085432,0.850009
XLP,0.499192,-0.054108,0.162037,0.621577,0.099056,-0.161071,1.0,0.67224,0.221576,0.719655,0.209798
XLRE,0.669723,-0.215969,0.27215,0.711556,0.507982,0.066019,0.67224,1.0,0.573872,0.73651,0.254329
XLU,0.517469,-0.274497,0.435835,0.339861,0.702568,0.26855,0.221576,0.573872,1.0,0.213689,0.070015
XLV,0.382986,0.01972,0.076223,0.67799,0.298052,0.085432,0.719655,0.73651,0.213689,1.0,0.351272


In [None]:
# Check if it equals the benchmark correlation matrix:
np.allclose(C, C_B)

False

In [None]:
# Make sure its smallest eigenvalue is >= 0:
np.min(np.linalg.eigvalsh(C))

-9.682217289685396e-07

**Implied covariance matrix**

In [None]:
# Convert to the implied covariance matrix:
Σ = V @ C @ V

In [None]:
implied_cov_matrix = pd.DataFrame(Σ, index=sector_etfs, columns=sector_etfs)
implied_cov_matrix

Unnamed: 0,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
XLB,0.0225,0.0033,0.015025,0.012564,0.012372,0.011626,0.008761,0.016174,0.012264,0.007009,0.01536
XLC,0.0033,0.016129,-0.000429,0.001623,0.002116,0.013277,-0.000804,-0.004416,-0.005508,0.000306,0.015786
XLE,0.015025,-0.000429,0.035344,0.00524,0.01102,0.014532,0.003564,0.008237,0.012946,0.001748,0.0092
XLF,0.012564,0.001623,0.00524,0.018496,0.010073,0.012994,0.009891,0.01558,0.007303,0.011249,0.015894
XLI,0.012372,0.002116,0.01102,0.010073,0.016129,0.018429,0.001472,0.010387,0.014098,0.004618,0.013027
XLK,0.011626,0.013277,0.014532,0.012994,0.018429,0.0361,-0.003581,0.00202,0.008062,0.00198,0.027617
XLP,0.008761,-0.000804,0.003564,0.009891,0.001472,-0.003581,0.013689,0.012663,0.004096,0.010272,0.004197
XLRE,0.016174,-0.004416,0.008237,0.01558,0.010387,0.00202,0.012663,0.025921,0.014598,0.014467,0.007002
XLU,0.012264,-0.005508,0.012946,0.007303,0.014098,0.008062,0.004096,0.014598,0.024964,0.004119,0.001892
XLV,0.007009,0.000306,0.001748,0.011249,0.004618,0.00198,0.010272,0.014467,0.004119,0.014884,0.007328


In [None]:
# Make sure the portfolio variance constraint is satisfied:
target_variance - (w.T @ Σ @ w)

-3.9343528435153985e-15

# Final code:

In [None]:
# Gather known data:
weight_series = pd.Series({'XLB': 0.0198, 'XLC': 0.0966, 'XLE': 0.0321, 'XLF': 0.1411, 'XLI': 0.0822, 'XLK': 0.3182, 'XLP': 0.0559, 'XLRE': 0.0208, 'XLU': 0.0235, 'XLV': 0.1014, 'XLY': 0.1084})
w = weight_series.to_numpy()

sector_etfs = weight_series.index.tolist()
n_sectors = len(sector_etfs)

implied_vol_SPY = 0.116

implied_vol_series = pd.Series({'XLB': 0.15, 'XLC': 0.127, 'XLE': 0.188, 'XLF': 0.136, 'XLI': 0.127, 'XLK': 0.19, 'XLP': 0.117, 'XLRE': 0.161, 'XLU': 0.158, 'XLV': 0.122, 'XLY': 0.171})
V = np.diag(implied_vol_series)

return_df = yf.download(sector_etfs, start='2025-01-16', end='2025-02-19', rounding=True, progress=False)['Close'].pct_change().dropna()
hist_corr_matrix = return_df.corr()
C_B = hist_corr_matrix.to_numpy()

# Solve for the implied covariance matrix:
target_variance = implied_vol_SPY**2

def construct_corr_matrix(unique_values):
    C = np.eye(n_sectors)
    upper_triangle_indices = np.triu_indices(n_sectors, k=1)
    C[upper_triangle_indices] = unique_values
    C[(upper_triangle_indices[1], upper_triangle_indices[0])] = unique_values
    return C

def objective(unique_values):
    C = construct_corr_matrix(unique_values)
    return np.linalg.norm(C - C_B, 'fro')

def portfolio_variance_constraint(unique_values):
    C = construct_corr_matrix(unique_values)
    return target_variance - (w.T @ (V @ C @ V) @ w)

def positive_semidefinite_constraint(unique_values):
    C = construct_corr_matrix(unique_values)
    eigenvalues = np.linalg.eigvalsh(C)
    return eigenvalues

initial_guess = C_B[np.triu_indices(n_sectors, k=1)]

constraints = [
    {'type': 'eq', 'fun': portfolio_variance_constraint},
    {'type': 'ineq', 'fun': positive_semidefinite_constraint}
]

bounds = [(-1, 1)] * len(initial_guess)

result = minimize(
    objective,
    initial_guess,
    method='SLSQP',
    constraints=constraints,
    bounds=bounds,
    options={'disp': True, 'maxiter': 1000}
)

C = construct_corr_matrix(result.x)
implied_corr_matrix = pd.DataFrame(C, index=sector_etfs, columns=sector_etfs)

Σ = V @ C @ V
implied_cov_matrix = pd.DataFrame(Σ, index=sector_etfs, columns=sector_etfs)

implied_cov_matrix

Optimization terminated successfully    (Exit mode 0)
            Current function value: 2.0188529336067362
            Iterations: 62
            Function evaluations: 3812
            Gradient evaluations: 62


Unnamed: 0,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
XLB,0.0225,0.0033,0.015025,0.012564,0.012372,0.011626,0.008761,0.016174,0.012264,0.007009,0.01536
XLC,0.0033,0.016129,-0.000429,0.001623,0.002116,0.013277,-0.000804,-0.004416,-0.005508,0.000306,0.015786
XLE,0.015025,-0.000429,0.035344,0.00524,0.01102,0.014532,0.003564,0.008237,0.012946,0.001748,0.0092
XLF,0.012564,0.001623,0.00524,0.018496,0.010073,0.012994,0.009891,0.01558,0.007303,0.011249,0.015894
XLI,0.012372,0.002116,0.01102,0.010073,0.016129,0.018429,0.001472,0.010387,0.014098,0.004618,0.013027
XLK,0.011626,0.013277,0.014532,0.012994,0.018429,0.0361,-0.003581,0.00202,0.008062,0.00198,0.027617
XLP,0.008761,-0.000804,0.003564,0.009891,0.001472,-0.003581,0.013689,0.012663,0.004096,0.010272,0.004197
XLRE,0.016174,-0.004416,0.008237,0.01558,0.010387,0.00202,0.012663,0.025921,0.014598,0.014467,0.007002
XLU,0.012264,-0.005508,0.012946,0.007303,0.014098,0.008062,0.004096,0.014598,0.024964,0.004119,0.001892
XLV,0.007009,0.000306,0.001748,0.011249,0.004618,0.00198,0.010272,0.014467,0.004119,0.014884,0.007328
