In [9]:
import pandas as pd
import numpy as np
from factor_analyzer import FactorAnalyzer
import yfinance as yf
from datetime import date

start_date = '2019-01-01'
end_date = date.today()

# Define the Fama-French 5 factors
factors = ['Market', 'Size', 'Value', 'Profitability', 'Investment'] #Robust Minus Weak # Conservative Minus Aggressive

# Define a list of potential ETFs for each factor
etfs = {
    'Market': [
        'SPY',   # SPDR S&P 500 ETF Trust
        'IVV',   # iShares Core S&P 500 ETF
        'VOO',   # Vanguard S&P 500 ETF
        'VTI',   # Vanguard Total Stock Market ETF
        'ITOT',  # iShares Core S&P Total U.S. Stock Market ETF
        'SCHB',  # Schwab U.S. Broad Market ETF
        'VV',    # Vanguard Large-Cap ETF
        'SCHX',  # Schwab U.S. Large-Cap ETF
        'QQQ',   # Invesco QQQ Trust
        'DIA'    # SPDR Dow Jones Industrial Average ETF Trust
    ],
    'Size': [
        'IWM',   # iShares Russell 2000 ETF
        'IJR',   # iShares Core S&P Small-Cap ETF
        'VB',    # Vanguard Small-Cap ETF
        'SCHA',  # Schwab U.S. Small-Cap ETF
        'IWC',   # iShares Micro-Cap ETF
        'VBK',   # Vanguard Small-Cap Growth ETF
        'VBR',   # Vanguard Small-Cap Value ETF
        'IJH',   # iShares Core S&P Mid-Cap ETF
        'VO',    # Vanguard Mid-Cap ETF
        'MDYG'   # SPDR S&P 400 Mid Cap Growth ETF
    ],
    'Value': [
        'VTV',   # Vanguard Value ETF
        'IUSV',  # iShares Core S&P U.S. Value ETF
        'IWD',   # iShares Russell 1000 Value ETF
        'VONV',  # Vanguard Russell 1000 Value ETF
        'SCHV',  # Schwab U.S. Large-Cap Value ETF
        'RPV',   # Invesco S&P 500 Pure Value ETF
        'VLUE',  # iShares MSCI USA Value Factor ETF
        'PWV',   # Invesco Dynamic Large Cap Value ETF
        'IWN',   # iShares Russell 2000 Value ETF
        'VBR'    # Vanguard Small-Cap Value ETF
    ],
    'Profitability': [
        'QUAL',  # iShares MSCI USA Quality Factor ETF
        'FQAL',  # Fidelity Quality Factor ETF
        'SPHQ',  # Invesco S&P 500 Quality ETF
        'JQUA',  # JPMorgan U.S. Quality Factor ETF
        'QDF',   # FlexShares Quality Dividend Index Fund
        'DGRW',  # WisdomTree U.S. Quality Dividend Growth Fund
        'VFQY',  # Vanguard U.S. Quality Factor ETF
        'IQLT',  # iShares MSCI Intl Quality Factor ETF
        'QDEF',  # FlexShares Quality Dividend Defensive Index Fund
        'LRGF'   # iShares U.S. Equity Factor ETF
    ],
    'Investment': [
        'IVLU',  # iShares Edge MSCI Intl Value Factor ETF
        'IVAL',  # Alpha Architect International Quantitative Value ETF
        'FNDF',  # Schwab Fundamental International Large Company Index ETF
        'FIVA',  # Fidelity International Value Factor ETF
        'RODM',  # Hartford Multifactor Developed Markets (ex-US) ETF
        'ISVL',  # iShares International Developed Small Cap Value Factor ETF
        'AVDV',  # Avantis International Small Cap Value ETF
        'DFAI',  # Dimensional International Core Equity Market ETF
        'AVDE',  # Avantis International Equity ETF
        'FNDE',   # Schwab Fundamental Emerging Markets Large Company Index ETF
        'MTUM',  # iShares MSCI USA Momentum Factor ETF
        'VFMO',  # Vanguard U.S. Momentum Factor ETF
        'PDP',   # Invesco DWA Momentum ETF
        'IMTM',  # iShares MSCI Intl Momentum Factor ETF
        'DWAS',  # Invesco DWA SmallCap Momentum ETF
        'XMMO',  # Invesco S&P MidCap Momentum ETF
        'QMOM',  # Alpha Architect U.S. Quantitative Momentum ETF
        'FDMO',  # Fidelity Momentum Factor ETF
        'MMTM',  # SPDR S&P 1500 Momentum Tilt ETF
        'JMOM'   # JPMorgan U.S. Momentum Factor ETF
    ]
}

# Function to download ETF data
def get_etf_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    return data.pct_change().dropna()

# Download ETF data

all_etfs = [etf for factor_etfs in etfs.values() for etf in factor_etfs]
etf_returns = get_etf_data(all_etfs, start_date, end_date)

# Perform factor analysis
fa = FactorAnalyzer(n_factors=5, rotation='varimax')
fa.fit(etf_returns)

# Get factor loadings
loadings = pd.DataFrame(fa.loadings_, index=etf_returns.columns, columns=factors)

# Function to select best ETF for each factor
def select_best_etfs(loadings, etfs):
    best_etfs = {}
    for factor in factors:
        factor_etfs = etfs[factor]
        factor_loadings = loadings.loc[factor_etfs, factor]
        best_etf = factor_loadings.abs().idxmax()
        best_etfs[factor] = best_etf
    return best_etfs

# Select best ETFs
best_etfs = select_best_etfs(loadings, etfs)

# Print results
print("Best ETFs for hedging Fama-French 5 factors:")
for factor, etf in best_etfs.items():
    print(f"{factor}: {etf}")


[*********************100%***********************]  59 of 59 completed


Best ETFs for hedging Fama-French 5 factors:
Market: QQQ
Size: VBR
Value: IWN
Profitability: DGRW
Investment: QMOM



1. Factor Analysis:

a) Correlation Matrix: 
   - First, a correlation matrix is computed from the ETF returns data.
   - This matrix shows how each ETF correlates with every other ETF.

b) Eigenvalue Decomposition:
   - The correlation matrix is then decomposed into eigenvalues and eigenvectors.
   - Eigenvalues represent the amount of variance explained by each factor.
   - Eigenvectors represent the direction of these factors in the data space.

c) Factor Extraction:
   - The specified number of factors (in this case, 5) with the largest eigenvalues are retained.
   - These factors explain the most variance in the data.

d) Initial Factor Loading Matrix:
   - The initial factor loading matrix is computed using these top eigenvectors.
   - Each element in this matrix represents how much a particular ETF loads onto a specific factor.

e) Communalities:
   - Communalities are calculated for each ETF.
   - These represent the proportion of each ETF's variance that can be explained by the common factors.

f) Iterative Process:
   - The factor analysis often uses an iterative process (like MINRES or Maximum Likelihood) to refine the factor loadings.
   - This process aims to minimize the difference between the observed correlation matrix and the matrix implied by the factor model.

2. Factor Loadings:

The factor loadings matrix is a crucial output of the factor analysis:

a) Interpretation:
   - Each row represents an ETF, and each column represents a factor.
   - The values in the matrix (loadings) indicate the strength and direction of the relationship between each ETF and each factor.

b) Varimax Rotation:
   - The varimax rotation is applied to the initial factor loadings.
   - This rotation aims to maximize the sum of the variances of the squared loadings.
   - It tends to produce a simpler structure where each ETF loads strongly on as few factors as possible.

c) Rotated Loadings:
   - After rotation, the loadings are easier to interpret.
   - High positive or negative loadings (e.g., > 0.5 or < -0.5) indicate a strong relationship between the ETF and the factor.
   - Loadings close to zero indicate little relationship.

d) Factor Interpretation:
   - Each factor is then interpreted based on which ETFs load strongly onto it.
   - For example, if several value ETFs load strongly on one factor, it might be interpreted as a "value factor".

e) Variance Explained:
   - The sum of squared loadings for each factor represents the variance explained by that factor.
   - This is often expressed as a percentage of the total variance in the data.

By going through this process, the factor analysis attempts to uncover the underlying structure in the ETF returns data, potentially revealing the Fama-French factors or other significant market factors that drive ETF returns. The resulting factor loadings provide a way to quantify how each ETF relates to these underlying factors, which is crucial for constructing a hedging strategy.



# BEST ETF TO HEDGE BIOTECH ; Variance Drag

In [11]:
# Define the list of biotech ETFs to analyze
biotech_etfs = [
    'XBI',  # SPDR S&P Biotech ETF
    'LABU', # Direxion Daily S&P Biotech Bull 3X Shares
    'IBB',  # iShares Biotechnology ETF
    'BBH',  # VanEck Biotech ETF
    'PBE',  # Invesco Dynamic Biotechnology & Genome ETF
    'ARKG', # ARK Genomic Revolution ETF
    'SBIO', # ALPS Medical Breakthroughs ETF
    'BIB',  # ProShares Ultra Nasdaq Biotechnology
    'FBT'   # First Trust NYSE Arca Biotechnology Index Fund
]

#'BIS',  # ProShares UltraShort Nasdaq Biotechnology ; 'LABD', # Direxion Daily S&P Biotech Bear 3X Shares
start_date = '2022-01-01'
end_date = date.today()

# Function to download ETF data
def get_etf_data(tickers, start_date, end_date):
    try:
        data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
        return data.pct_change().dropna()
    except Exception as e:
        print(f"Error downloading data: {e}")
        return pd.DataFrame()

# Download ETF data
etf_returns = get_etf_data(biotech_etfs, start_date, end_date)

# Calculate annualized returns and volatility
annualized_returns = (1 + etf_returns.mean()) ** 252 - 1
annualized_volatility = etf_returns.std() * np.sqrt(252)

# Calculate variance drag for each ETF
def calculate_variance_drag(returns):
    mean_return = returns.mean()
    std_dev = returns.std()
    variance_drag = ((1 + mean_return) ** 2) - (1 + mean_return + std_dev ** 2)
    return variance_drag

variance_drags = {etf: calculate_variance_drag(etf_returns[etf]) for etf in etf_returns.columns}

# Compile performance summary
performance_summary = pd.DataFrame({
    'Annualized Returns': annualized_returns,
    'Annualized Volatility': annualized_volatility,
    'Variance Drag': pd.Series(variance_drags)
})

# Sort ETFs by variance drag to find the best short candidate
performance_summary.sort_values(by='Variance Drag', ascending=False, inplace=True)

print("Performance Summary for Biotech ETFs:")  
print(performance_summary)


[*********************100%***********************]  9 of 9 completed

Performance Summary for Biotech ETFs:
      Annualized Returns  Annualized Volatility  Variance Drag
FBT             0.039078               0.219700      -0.000039
BBH             0.004059               0.215666      -0.000168
IBB             0.004853               0.221252      -0.000175
PBE             0.010504               0.236492      -0.000180
SBIO            0.003993               0.353838      -0.000481
XBI            -0.003116               0.352282      -0.000505
BIB            -0.006762               0.436832      -0.000784
ARKG           -0.209563               0.477342      -0.001836
LABU           -0.126366               1.044456      -0.004865





In [None]:
# Periods of timew where LABU outperformed XBI ; what is common amongst these periods