### Question 1

In [87]:
import pandas as pd
import numpy as np
import yfinance as yf

from datetime import date
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

**Data**

In [82]:
# Tickers
MAG7_TICKERS = ['AAPL','MSFT','TSLA','NVDA','META','AMZN','GOOGL']
MAG7_WEIGHTS = [0.0656,0.0643,0.0225,0.0699,0.0264,0.0426,0.0225]

INDEX_TICKER = ['QQQ']
EXTRA_TICKERS = ['AES', 'LNT', 'AEE', 'AEP', 'AWK', 'ATO', 'CNP', 'CMS', 'ED']

# Start/End Dates
start_date = date(2021,1,1)
end_date = date(2025,1,1)


# Yfinance Download
df_index = yf.download(tickers = INDEX_TICKER,start = start_date, end = end_date, auto_adjust = True)
df_mag7 = yf.download(tickers = MAG7_TICKERS,start = start_date, end = end_date, auto_adjust = True)
df_extra = yf.download(tickers = EXTRA_TICKERS,start = start_date, end = end_date, auto_adjust = True)

# Log Returns
df_index_ret = (np.log(df_index['Close'])
             .diff()
             .dropna()
        )

df_mag7_ret = (np.log(df_mag7['Close'])
            .diff()
            .dropna()
        )

df_extra_ret = (np.log(df_extra['Close'])
            .diff()
            .dropna()
        )

# Weighted MAG7 Returns (Estimate)
df_mag7_ret['Seven_Weighted'] = np.sum(df_mag7_ret*MAG7_WEIGHTS, axis = 1)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  7 of 7 completed
[*********************100%***********************]  9 of 9 completed


#### **a) How much of the beta is driven by the seven versus the 493 other stocks (i.e. they probably have different betas – think three)?**

The beta of a portfolio is the weighted sum of its component betas. That is 

$$
\beta_{Portfolio} = \sum_{i}w_i * \beta_{i}
$$

We know the beta of our portfolio, the market portfolio, is 1. This is given as the beta of a portfolio to itself is 1. We can thus analyze the contribution of the MAG7 stocks versus the rest.

In [5]:
df_mag7_ret['Seven_Weighted'].cov(df_index_ret['SPY']) / df_index_ret['SPY'].var() / 0.4

1.2362558151906078

In [6]:
df_mag7_ret['Seven_Weighted'].cov(df_index_ret['SPY']) / df_index_ret['SPY'].var()

0.49450232607624317

Nearly 50% of the index beta is drive by these 7 stocks.

We can see above that the beta of the MAG7 stocks is roughly 1.24. This means we can calculate a beta for the rest of the stocks in the SP500 must be an average of 0.75. In this sense, in our interpretation of beta, the MAG7 tickers drive a majority of the systematic risk that is associated with the index.

**Alternative View**

We can further analyze this contribution by looking at the sector breakdown contribution.

#### **b) How does this affect the use of beta for evaluating/forecasting returns and the use of beta for investment decisions?**

There are a few key ways in which beta is used throughout the investment process. Particurlarly, during return evaluation, it is common for an investor to look at their portfolio relative to an index, most commonly the SP500. You might for example calculate the beta of your strategy to the SP500, or the correlation of your strategy to the SP500. Further, you might utilize this in investment decisions to evaluate your current risk profile. 

**Forecasting**

The simplest model of asset returns is the CAPM. It is often the basis of introductory asset allocation and mean variance optimization. The CAPM formula says that

$$
E[r_i] = E[r_f] + \beta_{i}(E[r_m] - r_f)
$$


One of the key assumptions of The CAPM assumptions of the model is that the market is well diversified. 

In [55]:
rolling_results = []

window_size = 120

for start_idx in range(len(df_index_ret) - window_size + 1):
    end_idx = start_idx + window_size
    window_data = df_mag7_ret.iloc[start_idx:end_idx]
    date_range = window_data.index

    for ticker in MAG7_TICKERS:
        y = window_data[ticker].dropna()
        x = df_index_ret['SPY'].loc[y.index].values.reshape(-1, 1)
        
        if len(y) < 30: 
            continue
        
        
        model = LinearRegression().fit(x, y)
        y_pred = model.predict(x)
        
       
        r2 = model.score(x, y)
        mae = mean_absolute_error(y, y_pred)
        
        
        rolling_results.append({
            "Start_Date": date_range[0],
            "End_Date": date_range[-1],
            "Asset": ticker,
            "R2": r2,
            "MAE": mae,
        })

# Convert rolling results to DataFrame
rolling_df = pd.DataFrame(rolling_results)

In [49]:
aapl_r2 = (rolling_df.loc[rolling_df['Asset']=='AAPL'][['End_Date','Asset','R2']]
 .pivot(index='End_Date',columns='Asset',values='R2')
 )

In [56]:
rolling_df = rolling_df.set_index('End_Date')

In [57]:
fig = go.Figure()

for ticker in rolling_df['Asset'].unique():
    
    _df = rolling_df.loc[rolling_df['Asset'] == ticker,:]
    _df = _df.sort_index()
    fig.add_trace(
        go.Scatter(
            x = _df.index,
            y = _df['R2'],
            name = ticker
        )
    )

fig.update_layout(title = 'R2 of CAPM Regression by Ticker')
fig.update_yaxes(title = 'R2')
fig.show()

**Performance Evaluation**

**Risk Analysis + Hedging**

In [63]:
import statsmodels.api as sm

In [83]:
ff = pd.read_csv('FF_5Factor.csv')
ff = ff.set_index('date')
ff.index = pd.to_datetime(ff.index).date

In [84]:
df_index_ret.index = pd.to_datetime(df_index_ret.index).date

In [85]:
# Filter df_index_ret to only include dates in ff.index
aligned_data = df_index_ret.loc[df_index_ret.index.isin(ff.index)]

# Align ff data to df_index_ret's index
aligned_ff = ff.loc[ff.index.isin(aligned_data.index)]

# Ensure no missing data (drop NaN rows if any)
aligned_data = aligned_data.dropna()
aligned_ff = aligned_ff.dropna()

# Check if they have the same length and align perfectly
print(f"Aligned data length: {len(aligned_data)}")
print(f"Aligned FF data length: {len(aligned_ff)}")

Aligned data length: 752
Aligned FF data length: 752


In [86]:
# Define dependent and independent variables
y = aligned_data['QQQ']
X = aligned_ff[['mktrf', 'smb', 'hml', 'rmw', 'cma','umd']]  # Include MOM if available
X = sm.add_constant(X)  # Add constant for intercept

# Fit the regression
model = sm.OLS(y, X).fit()

# Display results
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                    QQQ   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.961
Method:                 Least Squares   F-statistic:                     3071.
Date:                Mon, 27 Jan 2025   Prob (F-statistic):               0.00
Time:                        16:36:12   Log-Likelihood:                 3313.8
No. Observations:                 752   AIC:                            -6614.
Df Residuals:                     745   BIC:                            -6581.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0002      0.000      1.942      0.0

In [81]:
"""
OLS Regression Results                            
==============================================================================
Dep. Variable:                    SPY   R-squared:                       0.996
Model:                            OLS   Adj. R-squared:                  0.996
Method:                 Least Squares   F-statistic:                 2.811e+04
Date:                Mon, 27 Jan 2025   Prob (F-statistic):               0.00
Time:                        16:31:45   Log-Likelihood:                 4359.1
No. Observations:                 752   AIC:                            -8704.
Df Residuals:                     745   BIC:                            -8672.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       3.824e-05    2.7e-05      1.414      0.158   -1.48e-05    9.13e-05
mktrf          0.9919      0.003    363.726      0.000       0.987       0.997
smb           -0.1136      0.004    -25.333      0.000      -0.122      -0.105
hml            0.0170      0.004      4.058      0.000       0.009       0.025
rmw            0.0813      0.005     16.622      0.000       0.072       0.091
cma            0.0511      0.007      7.141      0.000       0.037       0.065
umd           -0.0175      0.003     -6.673      0.000      -0.023      -0.012
==============================================================================
Omnibus:                        6.309   Durbin-Watson:                   2.180
Prob(Omnibus):                  0.043   Jarque-Bera (JB):                8.217
Skew:                          -0.058   Prob(JB):                       0.0164
Kurtosis:                       3.499   Cond. No.                         291.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

OLS Regression Results                            
==============================================================================
Dep. Variable:                    QQQ   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.961
Method:                 Least Squares   F-statistic:                     3071.
Date:                Mon, 27 Jan 2025   Prob (F-statistic):               0.00
Time:                        16:36:12   Log-Likelihood:                 3313.8
No. Observations:                 752   AIC:                            -6614.
Df Residuals:                     745   BIC:                            -6581.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0002      0.000      1.942      0.053    -2.3e-06       0.000
mktrf          1.1322      0.011    103.398      0.000       1.111       1.154
smb           -0.1506      0.018     -8.365      0.000      -0.186      -0.115
hml           -0.3229      0.017    -19.204      0.000      -0.356      -0.290
rmw            0.0616      0.020      3.137      0.002       0.023       0.100
cma           -0.0820      0.029     -2.852      0.004      -0.138      -0.026
umd            0.0286      0.011      2.719      0.007       0.008       0.049
==============================================================================
Omnibus:                       21.068   Durbin-Watson:                   1.950
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               44.072
Skew:                          -0.088   Prob(JB):                     2.69e-10
Kurtosis:                       4.173   Cond. No.                         291.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
"""




#### **c) What happens if there is a significant change in the price of one or more of those securities (especially since there may be significant interdependence due to the growth of AI investment by many of those firms)?**

**Hedging Continued**

In [108]:
# Tickers
PORTFOLIO_1 = ['AAPL','MSFT','NVDA','META','AMZN','GOOGL']
PORTFOLIO_2 = ['AES', 'LNT', 'AEE', 'AEP', 'AWK', 'ATO']
MARKET = ['SPY']
# Start, End Date
start = date(2021,1,1)
end = date(2025,1,1)

# Data
df_p1 = yf.download(tickers = PORTFOLIO_1, start= start, end = end, auto_adjust=True)['Close']
df_p2 = yf.download(tickers = PORTFOLIO_2, start= start, end = end, auto_adjust=True)['Close']
df_mk = yf.download(tickers = MARKET, start= start, end = end, auto_adjust=True)['Close']
# Returns
df_p1_ret = df_p1.pct_change().dropna()
df_p2_ret = df_p2.pct_change().dropna()
df_mk_ret = df_mk.pct_change().dropna()


[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  1 of 1 completed


In [109]:
df_p1_ret['EQ'] = df_p1_ret.mean(axis = 1)

In [113]:
beta_estimates = df_p1_ret.iloc[0:120,:]['EQ'].cov(df_mk_ret.iloc[0:120,:]['SPY']) / df_mk_ret.iloc[0:120,:]['SPY'].var()

1.3735649426357297