# Final Project

## Load Data

In [1]:
import yfinance as yf
from pypfopt import plotting, EfficientFrontier, objective_functions, expected_returns
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from pypfopt import BlackLittermanModel, black_litterman, risk_models


tickers = ["AAPL", "AMZN", "BRK-B", "GOOGL", "META", "MSFT", "NVDA", "TSLA", "UNH", "XOM"]
portfolio_data = yf.download(tickers, period="5y")
portfolio_data = portfolio_data["Adj Close"]
portfolio_data.tail()

[*********************100%***********************]  10 of 10 completed


Unnamed: 0_level_0,AAPL,AMZN,BRK-B,GOOGL,META,MSFT,NVDA,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-27,188.059998,129.179993,335.339996,118.330002,287.049988,334.570007,418.76001,250.210007,482.559998,104.550003
2023-06-28,189.25,129.039993,334.149994,120.18,285.290009,335.850006,411.170013,256.23999,474.450012,105.400002
2023-06-29,189.589996,127.900002,336.910004,119.099998,281.529999,335.049988,408.220001,257.5,476.440002,106.699997
2023-06-30,193.970001,130.360001,341.0,119.699997,286.980011,340.540009,423.019989,261.769989,480.640015,107.25
2023-07-03,192.460007,130.220001,342.0,119.900002,286.019989,337.98999,424.130005,279.820007,477.880005,107.459999


In [2]:
market_index_ticker = "^GSPC"  # S&P 500 index
market_index_data = yf.download(market_index_ticker, period="5y")
market_index_data = market_index_data["Adj Close"]
market_index_data.tail()

[*********************100%***********************]  1 of 1 completed


Date
2023-06-27    4378.410156
2023-06-28    4376.859863
2023-06-29    4396.439941
2023-06-30    4450.379883
2023-07-03    4455.589844
Name: Adj Close, dtype: float64

In [3]:
# Utility function to predict stock price for 10 future days using a non-linear regression model
def forecast_price(data, reg_model):
    dev_x = [data[i:i+10] for i in range(len(data)-20)]
    dev_y = [data[i+10] for i in range(10,len(data)-10)]
    test = [data[i:i+10] for i in range(len(data)-20,len(data)-10)]
    reg_model.fit(dev_x,dev_y)
    pred = reg_model.predict(test)
    return pred

## Mean Variance Optimization

In [6]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100)), SVR()]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg_model}\n")
    
    future_prices = {}
    for ticker in tickers:
        future_prices[ticker] = forecast_price(portfolio_data[ticker], reg_model)

    future_prices = pd.DataFrame(future_prices)
    S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()

    mu = expected_returns.capm_return(future_prices)

    ef = EfficientFrontier(mu, S)
    ef.min_volatility()
    weights = ef.clean_weights()
    print(f"Weights: {weights}")

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)

----------------------------------------------------
Non-linear ML model: RandomForestRegressor()

Weights: OrderedDict([('AAPL', 0.14121), ('AMZN', 0.01276), ('BRK-B', 0.13102), ('GOOGL', 0.05297), ('META', 0.09742), ('MSFT', 0.13166), ('NVDA', 0.14511), ('TSLA', 0.00114), ('UNH', 0.14663), ('XOM', 0.14009)])
Expected annual return: 0.16252859711640055
Annual volatility: 0.11086445476460734
Sharpe ratio: 1.2856113117502272
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()

Weights: OrderedDict([('AAPL', 0.16362), ('AMZN', 0.0), ('BRK-B', 0.15377), ('GOOGL', 0.06711), ('META', 0.11886), ('MSFT', 0.06206), ('NVDA', 0.14458), ('TSLA', 0.0), ('UNH', 0.14815), ('XOM', 0.14185)])
Expected annual return: 0.08220777594095553
Annual volatility: 0.07760013999205151
Sharpe ratio: 0.8016451509923487
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))

Weights: OrderedDict([('AAPL'

## Index model

In [7]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100)), SVR()]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg_model}\n")

    returns = {"MARKET": ((market_index_data / market_index_data.shift(1)) - 1).dropna().tolist()}
    for ticker in tickers:
        returns[ticker] = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
    market_future = forecast_price(returns["MARKET"], reg_model)


    beta = {}
    alpha = {}
    residual_variance = {}
    for ticker in tickers:
        stock_future = forecast_price(returns[ticker], reg_model)

        single_index_reg = LinearRegression()
        single_index_reg.fit(np.array(market_future).reshape(-1, 1), y=stock_future)

        beta[ticker] = single_index_reg.coef_[0]
        alpha[ticker] = single_index_reg.intercept_

        y_pred = single_index_reg.predict(np.array(returns["MARKET"]).reshape(-1, 1))
        residuals = returns[ticker] - y_pred
        residual_variance[ticker] = np.var(residuals)


    # Compute the initial position of each security
    weights = {ticker: alpha[ticker] / residual_variance[ticker] for ticker in tickers}
    total_weight = sum(weights.values())
    weights = {ticker: weight / total_weight for ticker, weight in weights.items()}

    # Compute the alpha of the active portfolio
    alpha_portfolio = sum(weights[ticker] * alpha[ticker] for ticker in tickers)

    # Compute the residual variance of the active portfolio
    residual_variance_portfolio = sum((weights[ticker] ** 2) * residual_variance[ticker] for ticker in tickers)

    # Compute the initial position in the active portfolio
    residual_variance_market = 0.0114  # Variance of S&P 500
    risk_premium_market = 0.056
    initial_position_portfolio = (alpha_portfolio * residual_variance_market) / (residual_variance_portfolio * risk_premium_market)

    # Compute the beta of the active portfolio
    beta_portfolio = sum(weights[ticker] * beta[ticker] for ticker in tickers)

    # Adjust the initial position in the active portfolio
    adjusted_position_portfolio = initial_position_portfolio / (1 + (1 - beta_portfolio) * initial_position_portfolio)

    # Optimal risky portfolio now has weights
    final_weight_market = 1 - adjusted_position_portfolio
    weights = {ticker: weight * adjusted_position_portfolio for ticker, weight in weights.items()}

    # Calculate the risk premium of the portfolio
    risk_premium_portfolio = (final_weight_market + adjusted_position_portfolio * beta_portfolio) * risk_premium_market + adjusted_position_portfolio * alpha_portfolio

    # Compute the variance of the portfolio
    portfolio_variance = (final_weight_market + adjusted_position_portfolio * beta_portfolio) ** 2 * residual_variance_market + adjusted_position_portfolio ** 2 * residual_variance_portfolio

    # Calculate the Sharpe ratio
    sharpe_ratio = risk_premium_portfolio / (portfolio_variance ** 0.5)

    print(f"Weights: {weights}")
    print(f"Market weight: {final_weight_market}")
    print(f"Sharpe ratio: {sharpe_ratio}")

----------------------------------------------------
Non-linear ML model: RandomForestRegressor()

Weights: {'AAPL': 0.14438425270067715, 'AMZN': -0.32752200277687143, 'BRK-B': 0.1038629876234869, 'GOOGL': -0.035243504152177384, 'META': 0.007627804936769037, 'MSFT': 0.0993561711047116, 'NVDA': 0.15466681767646137, 'TSLA': 0.05600156224606549, 'UNH': -0.043642640485203596, 'XOM': -0.0771980912473254}
Market weight: 0.9177066423734063
Sharpe ratio: 0.5787630338610331
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()

Weights: {'AAPL': -4.083877714918434, 'AMZN': 2.114837452700824, 'BRK-B': 1.6109341751008734, 'GOOGL': -0.5168286691424531, 'META': 0.42347918802756224, 'MSFT': -2.8200350589900247, 'NVDA': -1.769116695113761, 'TSLA': -0.8082359212335463, 'UNH': 0.019305953392820204, 'XOM': -0.39648351290419226}
Market weight: 7.226020803080331
Sharpe ratio: -0.5865232880697921
----------------------------------------------------
Non-linear

## Capital asset pricing model (CAPM)

In [9]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100)), SVR()]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg_model}")
    
    returns = {"MARKET": ((market_index_data / market_index_data.shift(1)) - 1).dropna().tolist()}
    for ticker in tickers:
        returns[ticker] = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
    market_future = forecast_price(returns["MARKET"], reg_model)

    future_returns = {}
    for ticker in tickers:
        single_index_reg = Ridge()
        single_index_reg.fit(np.array(returns["MARKET"]).reshape(-1, 1), y=returns[ticker])
        future_returns[ticker] = single_index_reg.predict(np.array(market_future).reshape(-1, 1))

    future_returns = pd.DataFrame(future_returns)

    S = risk_models.CovarianceShrinkage(future_returns).ledoit_wolf()
    ef = EfficientFrontier(None, S)
    ef.min_volatility()
    weights = ef.clean_weights()
    print(weights)

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)

----------------------------------------------------
Non-linear ML model: RandomForestRegressor()
OrderedDict([('AAPL', 0.18002), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.08681), ('NVDA', 0.23845), ('TSLA', 0.49472), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: None
Annual volatility: 4.107900387646437
Sharpe ratio: None
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()
OrderedDict([('AAPL', 0.17803), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.07145), ('NVDA', 0.2427), ('TSLA', 0.50781), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: None
Annual volatility: 4.730631393553236
Sharpe ratio: None
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))
OrderedDict([('AAPL', 0.14355), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.02915), ('NVDA', 0.22035), ('TSLA', 0.60695), ('

## Arbitrage pricing theory and multifactor model

In [31]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100)), SVR()]

for reg in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg}")
    
    # Fetch historical data for the macroeconomic factors
    factor_tickers = ['SPY', 'TLT', 'BND']
    factor_data = yf.download(factor_tickers, period="5y")
    factor_data = factor_data['Adj Close']

    # Calculate the returns for the S&P and Treasury bond factors
    factor_returns = factor_data.pct_change().dropna()
    spy_returns = factor_returns['SPY']
    tlt_returns = factor_returns['TLT']
    bnd_returns = factor_returns['BND']

    # FACTOR 1
    bnd_future = forecast_price(bnd_returns, reg_model)

    # FACTOR 2
    spy_future = forecast_price(spy_returns, reg_model)

    # FACTOR 3
    tlt_future = forecast_price(tlt_returns, reg_model)

    # FACTOR 4
    # GDP Price of USA for past 5 years

    # US GDP per capita Prices of last 5 years
    GDP_prices = {
        2018: 59607,
        2019: 60698,
        2020: 58453,
        2021: 61855,
        2022: 62551,
        2023: 63451 # Forecast data also available online
    }

    # Normalize the GDP values as it will make the other factors irrelevant as it is very large
    values = list(GDP_prices.values())
    values_array = [[value] for value in values]
    scaler = StandardScaler()
    scaled_values = scaler.fit_transform(values_array)
    scaled_values = scaled_values.flatten()
    scaled_GDP_prices = {year: scaled_value for year, scaled_value in zip(GDP_prices.keys(), scaled_values)}

    # print(scaled_GDP_prices)

    for index, row in factor_returns.iterrows():
        # Extract the year from the date
        year = index.year

        # Fill the 'GDP' column with the corresponding GDP price based on the year
        factor_returns.at[index, 'GDP'] = scaled_GDP_prices.get(year)

    future_factors = {'BND': bnd_future, 'SPY': spy_future, 'TLT': tlt_future, 'GDP': [scaled_GDP_prices.get(2023)]*10}
    future_factors = pd.DataFrame(future_factors)

    returns = {}
    future_returns = {}
    for ticker in tickers:
        stock_returns = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
        returns[ticker] = stock_returns

        single_index_reg = Ridge()
        single_index_reg.fit(factor_returns, stock_returns)
        stock_future = single_index_reg.predict(future_factors)
        future_returns[ticker] = stock_future

    future_returns = pd.DataFrame(future_returns)
    S = risk_models.sample_cov(future_returns)

    ef = EfficientFrontier(future_returns.mean(), S)
    weights = ef.max_sharpe(risk_free_rate=0.0)
    cleaned_weights = ef.clean_weights()
    print(cleaned_weights)

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)



----------------------------------------------------
Non-linear ML model: RandomForestRegressor()
[*********************100%***********************]  3 of 3 completed




OrderedDict([('AAPL', 0.0), ('AMZN', 0.00232), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.78935), ('TSLA', 0.20833), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.0018754959302084834
Annual volatility: 1.649533452626864
Sharpe ratio: 0.0011369856896335001
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()
[*********************100%***********************]  3 of 3 completed




OrderedDict([('AAPL', 0.0), ('AMZN', 0.00609), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.5438), ('TSLA', 0.28782), ('UNH', 0.00093), ('XOM', 0.16136)])
Expected annual return: 0.001515657627297046
Annual volatility: 0.20376655845670819
Sharpe ratio: 0.007438205948887631
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))
[*********************100%***********************]  3 of 3 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.3113), ('TSLA', 0.32039), ('UNH', 0.00291), ('XOM', 0.3654)])
Expected annual return: 0.0012702323830164623
Annual volatility: 0.27361679632411007
Sharpe ratio: 0.004642377222748494
----------------------------------------------------
Non-linear ML model: SVR()




[*********************100%***********************]  3 of 3 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.69851), ('TSLA', 0.27492), ('UNH', 0.0), ('XOM', 0.02658)])
Expected annual return: 0.0014966892173548416
Annual volatility: 0.5992097011586065
Sharpe ratio: 0.0024977720061289174




## Black Litterman model 

In [33]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100)), SVR()]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear regression model: {reg_model}")
    
    # Using a non-linear ML model to predict stock prices in future
    returns = {}
    future_returns = {}
    for ticker in tickers:
        stock_returns = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
        returns[ticker] = stock_returns
        future_returns[ticker] = forecast_price(stock_returns, reg_model) 

    # Using the predicted future return of stocks as "views" for black litterman model
    viewdict = {key: value[-1] for key, value in future_returns.items()}
    print(f"Views: {viewdict}")

    # Obtain market caps for all stocks in portfolio
    mcaps = {}
    for t in tickers:
        stock = yf.Ticker(t)
        mcaps[t] = stock.info["marketCap"]

    # Determine prior estimate of returns implied by the market weights
    S = risk_models.CovarianceShrinkage(portfolio_data).ledoit_wolf()
    market_prices = yf.download("SPY", period="max")["Adj Close"]
    delta = black_litterman.market_implied_risk_aversion(market_prices)
    market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)

    # Assign confidence measure for stock returns based on some heuristics
    confidences = [
        0.8,
        0.6,
        0.7,
        0.5,
        0.5,
        0.7,
        0.3,
        0.3,
        0.4,
        0.7
    ]

    # Fit the Black litterman model and calculate corresponding cov matrix and returns
    bl = BlackLittermanModel(S, pi=market_prior, absolute_views=viewdict, omega="idzorek", view_confidences=confidences)
    ret_bl = bl.bl_returns()
    S_bl = bl.bl_cov()

    # Find the optimal weights using bl cov matrix and returns
    ef = EfficientFrontier(ret_bl, S_bl)
    ef.add_objective(objective_functions.L2_reg)
    ef.max_sharpe()
    weights = ef.clean_weights()
    print(weights)

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)

----------------------------------------------------
Non-linear regression model: RandomForestRegressor()
Views: {'AAPL': 0.00633224662269967, 'AMZN': -0.0020286676439700935, 'BRK-B': -0.0011465237435448062, 'GOOGL': 0.000256539851857428, 'META': 0.007198624117980157, 'MSFT': -0.0014579840243541197, 'NVDA': 0.008122177306659671, 'TSLA': -0.001968889107699775, 'UNH': 0.0038141989391047093, 'XOM': 0.0005860386162657871}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.0205), ('BRK-B', 0.0), ('GOOGL', 0.01428), ('META', 0.07971), ('MSFT', 0.0), ('NVDA', 0.34011), ('TSLA', 0.54541), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.05849282854082451
Annual volatility: 0.5048045221698616
Sharpe ratio: 0.07625293920776735
----------------------------------------------------
Non-linear regression model: GradientBoostingRegressor()




Views: {'AAPL': 0.0033177345948418903, 'AMZN': 0.001877977546475723, 'BRK-B': -0.0012471009025219752, 'GOOGL': -0.003588571422636571, 'META': 0.004890396967907461, 'MSFT': 8.770881370671771e-05, 'NVDA': 0.00784314760799787, 'TSLA': -0.0018200927555136908, 'UNH': 0.0036170402296405017, 'XOM': -0.00018106866807791075}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.04166), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.06498), ('MSFT', 0.0), ('NVDA', 0.34187), ('TSLA', 0.55149), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.05831677329287283
Annual volatility: 0.5078426463537362
Sharpe ratio: 0.07545008984177239
----------------------------------------------------
Non-linear regression model: MLPRegressor(hidden_layer_sizes=(100, 100))




Views: {'AAPL': 0.004764763753117185, 'AMZN': -0.0021840117994252825, 'BRK-B': 0.0019713125809131815, 'GOOGL': -0.004418225733805237, 'META': 0.003923753288772314, 'MSFT': 0.003178079507192476, 'NVDA': 0.007059021559350037, 'TSLA': -0.005193922620450431, 'UNH': 0.0007379974605477187, 'XOM': 0.00039021809794351214}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.02223), ('BRK-B', 0.0), ('GOOGL', 0.00397), ('META', 0.06404), ('MSFT', 0.00199), ('NVDA', 0.35557), ('TSLA', 0.5522), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.058559773935090396
Annual volatility: 0.510623567980717
Sharpe ratio: 0.07551506893341546
----------------------------------------------------
Non-linear regression model: SVR()
Views: {'AAPL': -0.0001216460004583881, 'AMZN': 0.015986107034804065, 'BRK-B': 0.007365359484975839, 'GOOGL': -0.009469696458593852, 'META': 0.04811327248725136, 'MSFT': -0.005199401065915109, 'NVDA': 0.03650822726875128, 'T



[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.08848), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.20313), ('MSFT', 0.0), ('NVDA', 0.28268), ('TSLA', 0.42571), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.06204397647048717
Annual volatility: 0.45226726676500467
Sharpe ratio: 0.09296267839859604


