In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import yfinance as yf

# 1. Fetch daily price data for biotech and related indices
tickers = ['XBI', 'IBB', 'NBI', 'IWM', 'XLV']
prices = yf.download(tickers, start='2000-01-01', end='2024-09-01')['Adj Close']

# 2. Calculate daily returns (percentage change)
returns = prices.pct_change()

# 3. Fetch daily treasury yields (10-year and 2-year)
yields = yf.download(['^TNX', '^IRX'], start='2000-01-01', end='2024-09-01')['Adj Close']
yields.columns = ['10y_yield', '2y_yield']

# 4. Compute the 10y-2y yield spread and classify yield curve regimes
yields['10y_2y_spread'] = yields['10y_yield'] - yields['2y_yield']
yields['inverted_10y_2y'] = yields['10y_2y_spread'] < 0  # Inversion when 10y < 2y

# 5. Merge returns and yields data
df = pd.merge(returns, yields, left_index=True, right_index=True)

# 6. Compute rolling correlations between biotech index (XBI) and other indices
rolling_corr_window = 252  # 1-year window for rolling correlation
df['xbi_iwm_corr'] = df['XBI'].rolling(window=rolling_corr_window).corr(df['IWM'])
df['xbi_xlv_corr'] = df['XBI'].rolling(window=rolling_corr_window).corr(df['XLV'])

# 7. Analyze XBI returns in normal vs inverted yield curve regimes
xbi_returns_normal = df[~df['inverted_10y_2y']]['XBI']
xbi_returns_inverted = df[df['inverted_10y_2y']]['XBI']

# Output formatting for better readability
print("===== XBI Returns Summary =====\n")
print("Normal Yield Curve (No Inversion):")
print(xbi_returns_normal.describe(), "\n")
print("Inverted Yield Curve:")
print(xbi_returns_inverted.describe(), "\n")

# 8. Analyze correlations under different yield curve regimes
xbi_iwm_corr_normal = df[~df['inverted_10y_2y']]['xbi_iwm_corr']
xbi_iwm_corr_inverted = df[df['inverted_10y_2y']]['xbi_iwm_corr']

print("===== XBI-IWM Correlation Summary =====\n")
print("Normal Yield Curve:")
print(xbi_iwm_corr_normal.describe(), "\n")
print("Inverted Yield Curve:")
print(xbi_iwm_corr_inverted.describe(), "\n")

# 9. Prepare data for predictive modeling (X: features, y: target XBI returns)
X = df[['IWM', 'XLV', '10y_yield', '2y_yield', '10y_2y_spread', 'inverted_10y_2y', 'xbi_iwm_corr', 'xbi_xlv_corr']]

# Shift features to create lagged predictors
lag_periods = [1, 5, 21, 63]  # 1-day, 1-week, 1-month, 1-quarter lags
for lag in lag_periods:
    X_lag = X.shift(lag)
    X_lag.columns = [col + f'_lag{lag}' for col in X_lag.columns]
    X = pd.concat([X, X_lag], axis=1)

# Shift the target variable (XBI returns) to predict 1 month ahead
y = df['XBI'].shift(-21)  # Shift target variable by -21 days (1 month ahead)

# Drop rows with NaNs created by shifting
X = X.dropna()
y = y.reindex(X.index)

# 10. Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 11. Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# 12. Predict XBI returns on the test set
y_pred = model.predict(X_test)

# 13. Evaluate model performance using Mean Squared Error (MSE)
mse = np.mean((y_test - y_pred) ** 2)
print(f"===== Model Performance =====\nMean Squared Error: {mse:.4f}\n")

# 14. Feature importance (coefficients from linear regression)
importances = pd.Series(model.coef_, index=X.columns)
top_features = importances.sort_values(ascending=False).head(10)

# Output top features and their corresponding coefficients
print("===== Top 10 Feature Importances =====\n")
print(top_features.to_string())


[*********************100%***********************]  5 of 5 completed
  returns = prices.pct_change()
[*********************100%***********************]  2 of 2 completed


===== XBI Returns Summary =====

Normal Yield Curve (No Inversion):
count    737.000000
mean       0.000502
std        0.016362
min       -0.047046
25%       -0.010039
50%        0.000527
75%        0.010382
max        0.076684
Name: XBI, dtype: float64 

Inverted Yield Curve:
count    3933.000000
mean        0.000601
std         0.019969
min        -0.123472
25%        -0.010506
50%         0.000838
75%         0.011936
max         0.132640
Name: XBI, dtype: float64 

===== XBI-IWM Correlation Summary =====

Normal Yield Curve:
count    612.000000
mean       0.753253
std        0.048137
min        0.674662
25%        0.712617
50%        0.740736
75%        0.789306
max        0.855241
Name: xbi_iwm_corr, dtype: float64 

Inverted Yield Curve:
count    3807.000000
mean        0.743696
std         0.073803
min         0.499280
25%         0.704210
50%         0.738080
75%         0.806061
max         0.890762
Name: xbi_iwm_corr, dtype: float64 

===== Model Performance =====
Mean Square

### XBI Returns Summary

- **Returns During Inverted Yield Curves**:
  - The average daily return is slightly higher than during normal yield curves.
  - However, returns are more varied, meaning there are more extreme highs and lows.

- **Returns During Normal Yield Curves**:
  - The returns are fairly stable, with the highest and lowest returns being moderate.


### XBI-IWM Correlation Summary

- **During Normal Yield Curves**:
  - XBI and IWM usually move together quite well, with strong correlations.

- **During Inverted Yield Curves**:
  - The correlation between XBI and IWM is slightly weaker and varies more during these times.

### Top 10 Feature Importances

- **Most Important Predictors**:
  - **Lagged XBI-XLV Correlation (`xbi_xlv_corr_lag1`)**: This is the most significant feature for predicting XBI returns.
  - **Lagged XBI-IWM Correlation (`xbi_iwm_corr_lag1_lag5_lag21`)**: Combines correlations from different time periods (1 day, 1 week, 1 month) and is also very important.
  - **Current XBI-IWM Correlation (`xbi_iwm_corr`)**: The immediate correlation between XBI and IWM.
  - **Lagged XBI-XLV Correlation (`xbi_xlv_corr_lag21`)**: This feature looks at the XBI-XLV correlation with a 21-day delay.

- **Importance**:
  - Lagged correlations (how past relationships between indices affect predictions) are crucial for predicting future XBI returns.

The returns of XBI are slightly higher but more unpredictable during inverted yield curves. The correlations with IWM are a bit lower and more variable during these times. The most important factors for predicting XBI returns involve past correlations with other indices.