In [94]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import date
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

**Data**

In [152]:
etfs = {
    "XLK": "Technology",
    "XLV": "Health Care",
    "XLF": "Financial",
    "XLE": "Energy",
    "XLI": "Industrial",
    "XLY": "Consumer Discretionary",
    "XLP": "Consumer Staples",
    "XLB": "Materials",
    "XLU": "Utilities",
    "XLRE": "Real Estate"
}

mag7 = ["MSFT", "AAPL", "NVDA", "GOOGL", "AMZN", "META", "TSLA"]
# ["AAPL", "MSFT", "NVDA", "AMZN", "TSLA", "HD", "UNH", "JNJ", "PFE", "JPM", "BAC", "GS", "BA", "CAT", "GE", "XOM", "CVX", "KO", "PG", "PEP", "NEE", "D", "GOOGL", "META", "VZ", "LIN", "NEM", "SPG", "PLD"]


# ["MSFT", "AAPL", "NVDA", "GOOGL", "AMZN", "META", "TSLA"]

start_date, end_date = date(2022,1,1), date(2025,1,1)

prices = yf.download(tickers=mag7,start = start_date, end = end_date)['Close']

returns = prices.pct_change().dropna()

market_prices = yf.download(tickers=['SPY'],start = start_date, end = end_date)['Close']
market_returns = market_prices.pct_change().dropna()

[*********************100%***********************]  7 of 7 completed
[*********************100%***********************]  1 of 1 completed


**Apply PCA**

In [153]:
scaler = StandardScaler()
returns_scaled = scaler.fit_transform(returns)

# Perform PCA
pca = PCA()
pca.fit(returns_scaled)

# Eigenvalues (explained variance)
explained_variance = pca.explained_variance_ratio_

# Eigenvectors (principal components)
eigenvectors = pca.components_

loadings_df = pd.DataFrame(eigenvectors,columns=[f"PC {i+1}" for i in range(len(eigenvectors))])
loadings_df.index = mag7

In [154]:
import numpy as np

# Proper standardization
standard_returns = (returns - returns.mean()) / returns.std()

# Compute correlation matrix correctly
corr_matrix = np.corrcoef(standard_returns, rowvar=False)

# Eigen decomposition
eigenvalues, eigenvectors = np.linalg.eigh(corr_matrix)

# Sort eigenvalues and eigenvectors in descending order
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# Principal components are the eigenvectors
principal_components = eigenvectors

In [159]:
principal_components[0]

array([-0.38854795,  0.15299403, -0.32797908, -0.53501155, -0.43116225,
       -0.32436637, -0.37510165])

In [155]:
loadings_df['PC 1']

MSFT     0.388548
AAPL     0.152994
NVDA     0.327979
GOOGL   -0.535012
AMZN    -0.431162
META    -0.324366
TSLA    -0.375102
Name: PC 1, dtype: float64

*Plot*

In [156]:
fig = go.Figure()

loadings_df = loadings_df[['PC 1','PC 2']]
loadings_df['PC 1'] = loadings_df['PC 1']*-1
for stock in loadings_df.index:
    fig.add_trace(go.Bar(
        x=loadings_df.columns, 
        y=loadings_df.loc[stock], 
        name=stock  
    ))

fig.update_layout(
                  showlegend=True,
                  barmode='group',
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Principal Components',
                  yaxis_title = 'Weight',
                  xaxis=dict(title_standoff=3),
                  title = 'ETF Return PCA Loadings',
                  template = 'plotly_white'
                )

fig.show()

*Eigenportfolio*

In [157]:
vol = returns.std(axis = 0)
weights = loadings_df['PC 1'] / vol
weights = (weights / sum(abs(weights)))

ep1 = (1+ (returns*weights).sum(axis = 1)).cumprod()

In [158]:
fig = go.Figure()


fig.add_trace(
    go.Scatter(
        x = ep1.index,
        y = ep1,
        name = 'Eigenportfolio 1'
    )
)

fig.add_trace(
    go.Scatter(
        x = market_returns.index,
        y = (1+market_returns['SPY']).cumprod(),
        name = 'SPY'
    )
)

fig.update_layout(
                  showlegend=True,
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 400,
                  xaxis_title = 'Date',
                  yaxis_title = 'Cumulative Returns',
                  xaxis=dict(title_standoff=3),
                  title = 'Eigenportfolio vs Market',
                  template = 'plotly_white'
                )
fig.show()