### Question 1

In [47]:
import pandas as pd
import numpy as np
import yfinance as yf

from datetime import date
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

**Data**

In [48]:
# Tickers
MAG7_TICKERS = ['AAPL','MSFT','TSLA','NVDA','META','AMZN','GOOGL']
MAG7_WEIGHTS = [0.0656,0.0643,0.0225,0.0699,0.0264,0.0426,0.0225]

INDEX_TICKER = ['SPY']
EXTRA_TICKERS = ['AES', 'LNT', 'AEE', 'AEP', 'AWK', 'ATO', 'CNP', 'CMS', 'ED']

# Start/End Dates
start_date = date(2014,1,1)
end_date = date(2025,1,1)


# Yfinance Download
df_index = yf.download(tickers = INDEX_TICKER,start = start_date, end = end_date, auto_adjust = True)
df_mag7 = yf.download(tickers = MAG7_TICKERS,start = start_date, end = end_date, auto_adjust = True)
df_extra = yf.download(tickers = EXTRA_TICKERS,start = start_date, end = end_date, auto_adjust = True)

# Log Returns
df_index_ret = (np.log(df_index['Close'])
             .diff()
             .dropna()
        )

df_mag7_ret = (np.log(df_mag7['Close'])
            .diff()
            .dropna()
        )

df_extra_ret = (np.log(df_extra['Close'])
            .diff()
            .dropna()
        )

# Weighted MAG7 Returns (Estimate)
df_mag7_ret['Seven_Weighted'] = np.sum(df_mag7_ret*MAG7_WEIGHTS, axis = 1)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  7 of 7 completed
[*********************100%***********************]  9 of 9 completed


#### **a) How much of the beta is driven by the seven versus the 493 other stocks (i.e. they probably have different betas – think three)?**

The beta of a portfolio is the weighted sum of its component betas. That is 

$$
\beta_{Portfolio} = \sum_{i}w_i * \beta_{i}
$$

We know the beta of our portfolio, the market portfolio, is 1. This is given as the beta of a portfolio to itself is 1. We can thus analyze the contribution of the MAG7 stocks versus the rest.

*Mag7 Contribution by Time by Window Size*

In [49]:
def rolling_beta(portfolio_returns, market_returns, window):
    betas = []
    for i in range(window, len(portfolio_returns)):
        X = market_returns[i - window:i].values.reshape(-1,1)
        y = portfolio_returns[i - window:i]
        model = LinearRegression().fit(X, y)
        betas.append(model.coef_[0])
    return pd.Series(betas, index=portfolio_returns.index[window:])

In [50]:
windows = [30,60,90,120,180,250]

beta_series = {}
for window in windows:
    beta_series[window] = rolling_beta(df_mag7_ret['Seven_Weighted'],df_index_ret['SPY'],window)

contribution_ts = pd.DataFrame(beta_series).dropna()

*Plot*

In [51]:
fig = go.Figure()

for window in contribution_ts.columns:

    fig.add_trace(
        go.Scatter(
            x = contribution_ts.index,
            y = contribution_ts[window],
            name = f'Window {window}'
        )
    )

fig.update_layout(title = 'Mag7 Beta Contribution by Window Estimtation Length')
fig.update_yaxes(title = "Contribution %")
fig.update_xaxes(title = 'Date')
fig.show()

In [52]:
fig = go.Figure()

for window in contribution_ts.columns:

    fig.add_trace(
        go.Scatter(
            x = contribution_ts.index,
            y = contribution_ts[window] / sum(MAG7_WEIGHTS),
            name = f'Window {window}'
        )
    )

fig.update_layout(title = 'Mag7 Beta by Window Estimtation Length')
fig.update_yaxes(title = "Beta")
fig.update_xaxes(title = 'Date')
fig.show()

In [53]:
(contribution_ts / sum(MAG7_WEIGHTS)).var()

30     0.121182
60     0.077091
90     0.062344
120    0.056067
180    0.049972
250    0.045587
dtype: float64

#### **b) How does this affect the use of beta for evaluating/forecasting returns and the use of beta for investment decisions?**

There are a few key ways in which beta is used throughout the investment process. Particurlarly, during return evaluation, it is common for an investor to look at their portfolio relative to an index, most commonly the SP500. You might for example calculate the beta of your strategy to the SP500, or the correlation of your strategy to the SP500. Further, you might utilize this in investment decisions to evaluate your current risk profile. 

**Forecasting**

The simplest model of asset returns is the CAPM. It is often the basis of introductory asset allocation and mean variance optimization. The CAPM formula says that

$$
E[r_i] = E[r_f] + \beta_{i}(E[r_m] - r_f)
$$


One of the key assumptions of The CAPM assumptions of the model is that the market is well diversified. 

In [27]:
# Initialize results storage
rolling_results = []

for window_size in windows:
    # Perform rolling calculations
    for start_idx in range(len(df_index_ret) - window_size + 1):
        end_idx = start_idx + window_size
        window_data = df_mag7_ret.iloc[start_idx:end_idx]
        date_range = window_data.index

        # Perform CAPM regressions for each asset in the window
        for ticker in MAG7_TICKERS:
            y = window_data[ticker].dropna()
            x = df_index_ret['SPY'].loc[y.index].values.reshape(-1, 1)
            
            # Fit linear regression
            model = LinearRegression().fit(x, y)
            y_pred = model.predict(x)
            
            # Calculate R2 and MAE
            r2 = model.score(x, y)
            mae = mean_absolute_error(y, y_pred)
            
            # Store results
            rolling_results.append({
                "Start_Date": date_range[0],
                "End_Date": date_range[-1],
                "Asset": ticker,
                'Window':window_size,
                "R2": r2,
                "MAE": mae,
            })

# Convert rolling results to DataFrame
rolling_df = pd.DataFrame(rolling_results)

In [30]:
rolling_df = rolling_df.set_index('End_Date')

*Plot*

In [37]:
fig = go.Figure()

for ticker in rolling_df['Asset'].unique():
    for window_size in [250]:
        _df = rolling_df.loc[(rolling_df['Asset'] == ticker) & (rolling_df['Window'] == window_size),:]
        _df = _df.sort_index()

        fig.add_trace(
            go.Scatter(
                x = _df.index,
                y = _df['R2'],
                name = f"{ticker}-{window_size}"
            )
        )
fig.show()