In [40]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import date
import plotly.graph_objects as go

**Data**

In [41]:
etfs = ['XLK', 'XLV', 'XLF', 'XLE', 'XLI', 'XLY', 'XLP', 'XLB', 'XLU', 'XLRE']

start_date, end_date = date(2022,1,1), date(2025,1,1)

prices = yf.download(tickers=etfs,start = start_date, end = end_date)['Close']
returns = prices.pct_change().dropna()

market_prices = yf.download(tickers=['SPY'],start = start_date, end = end_date)['Close']
market_returns = market_prices.pct_change().dropna()

[*********************100%***********************]  10 of 10 completed
[*********************100%***********************]  1 of 1 completed


**Apply PCA**

In [42]:
# Standardize Returns
standardized_returns = (returns - returns.mean()) / returns.std()

# Compute correlation matrix 
cov_matrix = np.cov(standardized_returns, rowvar=False)

# Eigen decomposition
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

# Sort eigenvalues and eigenvectors in descending order
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# Loadings DataFrame
loadings_df = pd.DataFrame(eigenvectors,columns=[f"PC {i+1}" for i in range(len(eigenvectors))])
loadings_df.index = etfs

In [43]:
print(f"Variance Explained by PC 1 {round(eigenvalues[0] / eigenvalues.sum(),3)}")

Variance Explained by PC 1 0.623


*Plot*

In [44]:
fig = go.Figure()

loadings_df = loadings_df[['PC 1','PC 2']]
loadings_df['PC 1'] = loadings_df['PC 1'].abs()

for stock in loadings_df.index:
    fig.add_trace(go.Bar(
        x=loadings_df.columns, 
        y=loadings_df.loc[stock], 
        name=stock  
    ))

fig.update_layout(
                  showlegend=True,
                  barmode='group',
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Principal Components',
                  yaxis_title = 'Weight',
                  xaxis=dict(title_standoff=3),
                  title = 'PCA Loadings',
                  template = 'plotly_white'
                )

fig.show()

*Eigenportfolio*

In [45]:
vol = returns.std(axis = 0)
weights = loadings_df['PC 1'].abs() / vol
weights = (weights / sum(abs(weights)))

ep1 = (1+ (returns*weights).sum(axis = 1)).cumprod()

In [46]:
fig = go.Figure()


fig.add_trace(
    go.Scatter(
        x = ep1.index,
        y = ep1,
        name = 'Eigenportfolio 1'
    )
)

fig.add_trace(
    go.Scatter(
        x = market_returns.index,
        y = (1+market_returns['SPY']).cumprod(),
        name = 'SPY'
    )
)

fig.update_layout(
                  showlegend=True,
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Date',
                  yaxis_title = 'Cumulative Returns',
                  xaxis=dict(title_standoff=3),
                  title = 'Eigenportfolio vs Market',
                  template = 'plotly_white'
                )
fig.show()