In [27]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import date
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

**Data**

In [35]:
etfs = {
    "XLK": "Technology",
    "XLV": "Health Care",
    "XLF": "Financial",
    "XLE": "Energy",
    "XLI": "Industrial",
    "XLY": "Consumer Discretionary",
    "XLP": "Consumer Staples",
    "XLB": "Materials",
    "XLU": "Utilities",
    "XLRE": "Real Estate"
}

mag7 = list(etfs.keys()) # ["MSFT", "AAPL", "NVDA", "GOOGL", "AMZN", "META", "TSLA"]
# ["AAPL", "MSFT", "NVDA", "AMZN", "TSLA", "HD", "UNH", "JNJ", "PFE", "JPM", "BAC", "GS", "BA", "CAT", "GE", "XOM", "CVX", "KO", "PG", "PEP", "NEE", "D", "GOOGL", "META", "VZ", "LIN", "NEM", "SPG", "PLD"]

start_date, end_date = date(2022,1,1), date(2025,1,1)

prices = yf.download(tickers=mag7,start = start_date, end = end_date)['Close']
returns = prices.pct_change().dropna()

market_prices = yf.download(tickers=['SPY'],start = start_date, end = end_date)['Close']
market_returns = market_prices.pct_change().dropna()

[*********************100%***********************]  10 of 10 completed
[*********************100%***********************]  1 of 1 completed


**Apply PCA**

In [36]:
# Proper standardization
standard_returns = (returns - returns.mean()) / returns.std()

# Compute correlation matrix correctly
corr_matrix = np.corrcoef(standard_returns, rowvar=False)

# Eigen decomposition
eigenvalues, eigenvectors = np.linalg.eigh(corr_matrix)

# Sort eigenvalues and eigenvectors in descending order
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# Principal components are the eigenvectors
principal_components = eigenvectors

# Loadings df
loadings_df = pd.DataFrame(principal_components,columns=[f"PC {i+1}" for i in range(len(eigenvectors))])
loadings_df.index = mag7

*Plot*

In [37]:
fig = go.Figure()

loadings_df = loadings_df[['PC 1','PC 2']]
loadings_df['PC 1'] = loadings_df['PC 1'].abs()

for stock in loadings_df.index:
    fig.add_trace(go.Bar(
        x=loadings_df.columns, 
        y=loadings_df.loc[stock], 
        name=stock  
    ))

fig.update_layout(
                  showlegend=True,
                  barmode='group',
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Principal Components',
                  yaxis_title = 'Weight',
                  xaxis=dict(title_standoff=3),
                  title = 'PCA Loadings',
                  template = 'plotly_white'
                )

fig.show()

*Eigenportfolio*

In [38]:
vol = returns.std(axis = 0)
weights = loadings_df['PC 1'].abs() / vol
weights = (weights / sum(abs(weights)))

ep1 = (1+ (returns*weights).sum(axis = 1)).cumprod()

In [39]:
fig = go.Figure()


fig.add_trace(
    go.Scatter(
        x = ep1.index,
        y = ep1,
        name = 'Eigenportfolio 1'
    )
)

fig.add_trace(
    go.Scatter(
        x = market_returns.index,
        y = (1+market_returns['SPY']).cumprod(),
        name = 'SPY'
    )
)

fig.update_layout(
                  showlegend=True,
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Date',
                  yaxis_title = 'Cumulative Returns',
                  xaxis=dict(title_standoff=3),
                  title = 'Eigenportfolio vs Market',
                  template = 'plotly_white'
                )
fig.show()