# Final Project

## Notes

In [None]:
'''
Using the following machine learning models for making predictions for the following portfolio optimization models:
- Mean-variance optimization (chapter 7 of Bodie, Investment book)
- Index model (chapter 8 of Bodie, Investment book)
- Capital asset pricing model (chapter 9 of Bodie, Investment book)
- Arbitrage pricing theory and multifactor model (chapter 10 of Bodie,
Investment book)
- Equity valuation model (chapter 18 of Bodie, Investment book)
- Black Litterman model (chapter 24 of Bodie, Investment book)
- Algorithmic trading (this could be restricted to the last month)
'''

## Load Data

In [1]:
import yfinance as yf
from pypfopt import plotting, EfficientFrontier, objective_functions, expected_returns
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from pypfopt import BlackLittermanModel, black_litterman, risk_models


tickers = ["AAPL", "AMZN", "BRK-B", "GOOGL", "META", "MSFT", "NVDA", "TSLA", "UNH", "XOM"]
portfolio_data = yf.download(tickers, period="5y")
portfolio_data = portfolio_data["Adj Close"]
portfolio_data.tail()

[*********************100%***********************]  10 of 10 completed


Unnamed: 0_level_0,AAPL,AMZN,BRK-B,GOOGL,META,MSFT,NVDA,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-27,188.059998,129.179993,335.339996,118.330002,287.049988,334.570007,418.76001,250.210007,482.559998,104.550003
2023-06-28,189.25,129.039993,334.149994,120.18,285.290009,335.850006,411.170013,256.23999,474.450012,105.400002
2023-06-29,189.589996,127.900002,336.910004,119.099998,281.529999,335.049988,408.220001,257.5,476.440002,106.699997
2023-06-30,193.970001,130.360001,341.0,119.699997,286.980011,340.540009,423.019989,261.769989,480.640015,107.25
2023-07-03,192.460007,130.220001,342.0,119.900002,286.019989,337.98999,424.130005,279.820007,477.880005,107.459999


In [2]:
market_index_ticker = "^GSPC"  # S&P 500 index
market_index_data = yf.download(market_index_ticker, period="5y")
market_index_data = market_index_data["Adj Close"]
market_index_data.tail()

[*********************100%***********************]  1 of 1 completed


Date
2023-06-27    4378.410156
2023-06-28    4376.859863
2023-06-29    4396.439941
2023-06-30    4450.379883
2023-07-03    4455.589844
Name: Adj Close, dtype: float64

## Mean Variance Optimization

### Random Forest 

In [3]:
future_prices = {}
for ticker in tickers:
    data = portfolio_data[ticker].tolist()
    # Dependent variable - 10 consecutive days of stock prices
    dev_x = [data[i:i+10] for i in range(len(data)-20)]
    
    # Independent variable - stock price 10th day into the future
    dev_y = [data[i+10] for i in range(10,len(data)-10)]
    
    test = [data[i:i+10] for i in range(len(data)-20,len(data)-10)]
    
    reg = RandomForestRegressor()
    reg.fit(dev_x,dev_y)
    
    # Predict stock price for 10 future days
    pred = reg.predict(test)
    future_prices[ticker] = pred

future_prices = pd.DataFrame(future_prices)
# Construct covariance matrix of future stock prices
S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()

# Use capm to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)
print(mu)

# Do mean variance optimization using efficient frontier
ef = EfficientFrontier(mu, S)
ef.min_volatility()
weights = ef.clean_weights()
print(weights)
# weights = ef.max_sharpe(risk_free_rate=0.02)
# cleaned_weights = ef.clean_weights()
# print(cleaned_weights)

ret, volatility, sharpe_ratio = ef.portfolio_performance()
print("Expected annual return:", ret)
print("Annual volatility:", volatility)
print("Sharpe ratio:", sharpe_ratio)


AAPL     0.025210
AMZN     2.252283
BRK-B    0.187033
GOOGL    1.554961
META     0.417067
MSFT     0.038136
NVDA     0.013240
TSLA     3.328888
UNH     -0.230505
XOM      0.144229
Name: mkt, dtype: float64
OrderedDict([('AAPL', 0.14446), ('AMZN', 0.00734), ('BRK-B', 0.13015), ('GOOGL', 0.04641), ('META', 0.09304), ('MSFT', 0.14107), ('NVDA', 0.14968), ('TSLA', 0.00304), ('UNH', 0.14817), ('XOM', 0.13665)])
Expected annual return: 0.1585062746466839
Annual volatility: 0.11130273901640701
Sharpe ratio: 1.2444102981712504


### SVM

In [4]:
future_prices = {}
for ticker in tickers:
    data = portfolio_data[ticker].tolist()
    dev_x = [data[i:i+10] for i in range(len(data)-20)]
    dev_y = [data[i+10] for i in range(10, len(data)-10)]
    test = [data[i:i+10] for i in range(len(data)-20, len(data)-10)]
    
    reg = SVR()
    reg.fit(dev_x, dev_y)
    
    pred = reg.predict(test)
    future_prices[ticker] = pred

future_prices = pd.DataFrame(future_prices)

S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()
mu = expected_returns.capm_return(future_prices)
print(mu)

ef = EfficientFrontier(mu, S)
ef.min_volatility()
weights = ef.clean_weights()
print(weights)


ret, volatility, sharpe_ratio = ef.portfolio_performance()
print("Expected annual return:", ret)
print("Annual volatility:", volatility)
print("Sharpe ratio:", sharpe_ratio)


AAPL     0.036081
AMZN    -0.105575
BRK-B   -0.004571
GOOGL   -0.130701
META     0.003582
MSFT    -0.023083
NVDA    -0.061680
TSLA    -0.156760
UNH     -0.037295
XOM      0.000436
Name: mkt, dtype: float64
OrderedDict([('AAPL', 0.21053), ('AMZN', 0.01913), ('BRK-B', 0.16), ('GOOGL', 0.027), ('META', 0.17512), ('MSFT', 0.10298), ('NVDA', 0.01529), ('TSLA', 0.0442), ('UNH', 0.10212), ('XOM', 0.14363)])
Expected annual return: -0.012051418502632178
Annual volatility: 0.012313234762764323
Sharpe ratio: -2.603005556229371


### Neural Network

In [5]:
future_prices = {}
for ticker in tickers:
    data = portfolio_data[ticker].tolist()
    dev_x = [data[i:i+10] for i in range(len(data)-20)]
    dev_y = [data[i+10] for i in range(10, len(data)-10)]
    test = [data[i:i+10] for i in range(len(data)-20, len(data)-10)]
    
    reg = MLPRegressor(hidden_layer_sizes=(100,100), random_state=42)  # You can adjust the hidden_layer_sizes and other parameters as needed
    reg.fit(dev_x, dev_y)
    
    pred = reg.predict(test)
    future_prices[ticker] = pred

future_prices = pd.DataFrame(future_prices)

S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()
mu = expected_returns.capm_return(future_prices)
print(mu)

ef = EfficientFrontier(mu, S)
ef.min_volatility()
weights = ef.clean_weights()
print(weights)

ret, volatility, sharpe_ratio = ef.portfolio_performance()
print("Expected annual return:", ret)
print("Annual volatility:", volatility)
print("Sharpe ratio:", sharpe_ratio)



AAPL    -0.051072
AMZN     0.005034
BRK-B   -0.013537
GOOGL   -0.046004
META    -0.056244
MSFT    -0.066116
NVDA    -0.273637
TSLA    -0.415881
UNH     -0.012283
XOM      0.012491
Name: mkt, dtype: float64
OrderedDict([('AAPL', 0.08875), ('AMZN', 0.1699), ('BRK-B', 0.26304), ('GOOGL', 0.06651), ('META', 0.09893), ('MSFT', 0.06343), ('NVDA', 0.0), ('TSLA', 0.0), ('UNH', 0.11485), ('XOM', 0.13459)])
Expected annual return: -0.019785228834557224
Annual volatility: 0.05025352960990399
Sharpe ratio: -0.7916902383452948


## Index model

In [6]:
class SingleIndexModel:
    def __init__(self, tickers, market_index_data, portfolio_data, reg_model):
        self.tickers = tickers
        self.market_index_data = market_index_data
        self.portfolio_data = portfolio_data
        self.reg_model = reg_model
        self.returns = {}
        self.beta = {}
        self.alpha = {}
        self.residual_variance = {}

    def calculate_returns(self):
        # Returns of stocks and market
        self.returns = {"MARKET": ((self.market_index_data / self.market_index_data.shift(1)) - 1).dropna().tolist()}
        for ticker in self.tickers:
            self.returns[ticker] = ((self.portfolio_data[ticker] / self.portfolio_data[ticker].shift(1))-1).dropna().tolist()

    def fit_regression(self):
        # Forecast market
        market_x = [self.returns["MARKET"][i:i+10] for i in range(len(self.returns["MARKET"])-20)]
        market_y = [self.returns["MARKET"][i+10] for i in range(10, len(self.returns["MARKET"])-10)]
        market_test = [self.returns["MARKET"][i:i+10] 
                       for i in range(len(self.returns["MARKET"])-20, len(self.returns["MARKET"])-10)]

        reg = self.reg_model
        reg.fit(market_x, market_y)
        market_future = reg.predict(market_test)

        # Forecast each stock
        for ticker in self.tickers:
            stock_x = [self.returns[ticker][i:i+10] for i in range(len(self.returns[ticker])-20)]
            stock_y = [self.returns[ticker][i+10] for i in range(10, len(self.returns[ticker])-10)]
            stock_test = [self.returns[ticker][i:i+10]
                          for i in range(len(self.returns[ticker])-20, len(self.returns[ticker])-10)]

            reg = self.reg_model
            reg.fit(stock_x, stock_y)
            stock_future = reg.predict(stock_test)

            single_index_reg = LinearRegression()
            single_index_reg.fit(np.array(market_future).reshape(-1, 1), y=stock_future)

            self.beta[ticker] = single_index_reg.coef_[0]
            self.alpha[ticker] = single_index_reg.intercept_

            y_pred = single_index_reg.predict(np.array(self.returns["MARKET"]).reshape(-1, 1))
            residuals = self.returns[ticker] - y_pred
            self.residual_variance[ticker] = np.var(residuals)

    def optimize_portfolio(self):
        # Compute the initial position of each security
        weights = {ticker: self.alpha[ticker] / self.residual_variance[ticker] for ticker in self.tickers}
        total_weight = sum(weights.values())
        weights = {ticker: weight / total_weight for ticker, weight in weights.items()}

        # Compute the alpha of the active portfolio
        alpha_portfolio = sum(weights[ticker] * self.alpha[ticker] for ticker in self.tickers)

        # Compute the residual variance of the active portfolio
        residual_variance_portfolio = sum((weights[ticker] ** 2) * self.residual_variance[ticker] for ticker in self.tickers)

        # Compute the initial position in the active portfolio
        residual_variance_market = 0.0114  # Variance of S&P 500
        risk_premium_market = 0.056
        initial_position_portfolio = (alpha_portfolio * residual_variance_market) / (residual_variance_portfolio * risk_premium_market)

        # Compute the beta of the active portfolio
        beta_portfolio = sum(weights[ticker] * self.beta[ticker] for ticker in self.tickers)

        # Adjust the initial position in the active portfolio
        adjusted_position_portfolio = initial_position_portfolio / (1 + (1 - beta_portfolio) * initial_position_portfolio)

        # Optimal risky portfolio now has weights
        final_weight_market = 1 - adjusted_position_portfolio
        weights = {ticker: weight * adjusted_position_portfolio for ticker, weight in weights.items()}

        # Calculate the risk premium of the portfolio
        risk_premium_portfolio = (final_weight_market + adjusted_position_portfolio * beta_portfolio) * risk_premium_market + adjusted_position_portfolio * alpha_portfolio

        # Compute the variance of the portfolio
        portfolio_variance = (final_weight_market + adjusted_position_portfolio * beta_portfolio) ** 2 * residual_variance_market + adjusted_position_portfolio ** 2 * residual_variance_portfolio

        # Calculate the Sharpe ratio
        sharpe_ratio = risk_premium_portfolio / (portfolio_variance ** 0.5)

        return weights, final_weight_market, sharpe_ratio


### Random Forest

In [7]:
reg_model = RandomForestRegressor()
portfolio = SingleIndexModel(tickers, market_index_data, portfolio_data, reg_model)
portfolio.calculate_returns()
portfolio.fit_regression()
weights, final_weight_market, sharpe_ratio = portfolio.optimize_portfolio()
print("------------------------------")
print("Final Weights:")
print("Weight Market S&P:", final_weight_market)
print(f"Weights: {weights}")
print("------------------------------")
print("Sharpe ratio:", sharpe_ratio)

------------------------------
Final Weights:
Weight Market S&P: 0.5863947154779436
Weights: {'AAPL': 0.26245855216495373, 'AMZN': -0.24351280116629392, 'BRK-B': 0.05099943749882177, 'GOOGL': 0.17223330565613174, 'META': 0.015777155097337677, 'MSFT': 0.20589381728954562, 'NVDA': 0.13104113814112353, 'TSLA': 0.07097496736933147, 'UNH': -0.011992229889740521, 'XOM': -0.24026805763915465}
------------------------------
Sharpe ratio: 0.5750544045291437


### SVM

In [8]:
reg_model = SVR()
portfolio = SingleIndexModel(tickers, market_index_data, portfolio_data, reg_model)
portfolio.calculate_returns()
portfolio.fit_regression()
weights, final_weight_market, sharpe_ratio = portfolio.optimize_portfolio()
print("------------------------------")
print("Final Weights:")
print("Weight Market S&P:", final_weight_market)
print(f"Weights: {weights}")
print("------------------------------")
print("Sharpe ratio:", sharpe_ratio)

------------------------------
Final Weights:
Weight Market S&P: 1.6320028230417156
Weights: {'AAPL': -0.12279014064991728, 'AMZN': -0.0461725280570754, 'BRK-B': -0.13287934469957183, 'GOOGL': -0.03694513352444664, 'META': -0.20714365403244403, 'MSFT': -0.09824409442752727, 'NVDA': 0.007111067632432751, 'TSLA': -0.061080731833562635, 'UNH': 0.04178252368762089, 'XOM': 0.02435921286277579}
------------------------------
Sharpe ratio: -2.6875784968770726


### Neural Network

In [9]:
reg_model = MLPRegressor(hidden_layer_sizes=(100,100))
portfolio = SingleIndexModel(tickers, market_index_data, portfolio_data, reg_model)
portfolio.calculate_returns()
portfolio.fit_regression()
weights, final_weight_market, sharpe_ratio = portfolio.optimize_portfolio()
print("------------------------------")
print("Final Weights:")
print("Weight Market S&P:", final_weight_market)
print(f"Weights: {weights}")
print("------------------------------")
print("Sharpe ratio:", sharpe_ratio)

------------------------------
Final Weights:
Weight Market S&P: -2.344827058474747
Weights: {'AAPL': 1.361267585370607, 'AMZN': -0.20477682489179236, 'BRK-B': 0.5025486018685142, 'GOOGL': 0.8856925565602729, 'META': 0.12421059997387304, 'MSFT': 0.30407503866548125, 'NVDA': 0.3163740951493393, 'TSLA': -0.09759375316058574, 'UNH': 0.06859656568202628, 'XOM': 0.08443259325701072}
------------------------------
Sharpe ratio: 0.6987993476646619


### Capital asset pricing model (CAPM)

In [10]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100))]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg_model}")
    
    returns = {"MARKET": ((market_index_data / market_index_data.shift(1)) - 1).dropna().tolist()}
    for ticker in tickers:
        returns[ticker] = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()


    market_x = [returns["MARKET"][i:i+10] for i in range(len(returns["MARKET"])-20)]
    market_y = [returns["MARKET"][i+10] for i in range(10, len(returns["MARKET"])-10)]
    market_test = [returns["MARKET"][i:i+10]
                   for i in range(len(returns["MARKET"])-20, len(returns["MARKET"])-10)]

    reg = MLPRegressor(hidden_layer_sizes=(100,100))
    reg.fit(market_x, market_y)
    market_future = reg.predict(market_test)
#     print(market_future)

    future_returns = {}
    for ticker in tickers:
        single_index_reg = Ridge()
        single_index_reg.fit(np.array(returns["MARKET"]).reshape(-1, 1), y=returns[ticker])
        future_returns[ticker] = single_index_reg.predict(np.array(market_future).reshape(-1, 1))

    future_returns = pd.DataFrame(future_returns)
#     print(future_returns)

    S = risk_models.CovarianceShrinkage(future_returns).ledoit_wolf()
    ef = EfficientFrontier(None, S)
    ef.min_volatility()
    weights = ef.clean_weights()
    print(weights)


    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)

----------------------------------------------------
Non-linear ML model: RandomForestRegressor()
OrderedDict([('AAPL', 0.01074), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.15156), ('TSLA', 0.8377), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: None
Annual volatility: 2.5132886377335244
Sharpe ratio: None
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()
OrderedDict([('AAPL', 0.18022), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.07494), ('NVDA', 0.24377), ('TSLA', 0.50106), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: None
Annual volatility: 3.7654381293505743
Sharpe ratio: None
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))
OrderedDict([('AAPL', 0.11639), ('AMZN', 0.08814), ('BRK-B', 0.07595), ('GOOGL', 0.07992), ('META', 0.09114), ('MSFT', 0.11192), ('NVDA', 0.119), ('TSLA', 

### Arbitrage pricing theory and multifactor model

In [11]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100))]

In [14]:
for reg in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg}")
    
    # Fetch historical data for the macroeconomic factors
    factor_tickers = ['SPY', 'TLT', 'BND']
    factor_data = yf.download(factor_tickers, period="5y")
    factor_data = factor_data['Adj Close']

    # Calculate the returns for the S&P and Treasury bond factors
    factor_returns = factor_data.pct_change().dropna()
    spy_returns = factor_returns['SPY']
    tlt_returns = factor_returns['TLT']
    bnd_returns = factor_returns['BND']

    # FACTOR 1
    # Train ML model to predict future price of S&P market factors
    bnd_x = [bnd_returns[i:i+10] for i in range(len(bnd_returns)-20)]
    bnd_y = [bnd_returns[i+10] for i in range(10, len(bnd_returns)-10)]

    bnd_test = [bnd_returns[i:i+10]
                   for i in range(len(bnd_returns)-20, len(bnd_returns)-10)]
    reg.fit(bnd_x, bnd_y)
    bnd_future = reg.predict(bnd_test)

    # FACTOR 2
    # Train ML model to predict future price of S&P market factors
    spy_x = [spy_returns[i:i+10] for i in range(len(spy_returns)-20)]
    spy_y = [spy_returns[i+10] for i in range(10, len(spy_returns)-10)]

    spy_test = [spy_returns[i:i+10]
                   for i in range(len(spy_returns)-20, len(spy_returns)-10)]
    reg.fit(spy_x, spy_y)
    spy_future = reg.predict(spy_test)

    # FACTOR 3
    # Train ML model to predict future price of Treasury Bond ETF factors
    tlt_x = [tlt_returns[i:i+10] for i in range(len(tlt_returns)-20)]
    tlt_y = [tlt_returns[i+10] for i in range(10, len(tlt_returns)-10)]

    tlt_test = [tlt_returns[i:i+10]
                   for i in range(len(tlt_returns)-20, len(tlt_returns)-10)]

    reg.fit(tlt_x, tlt_y)
    tlt_future = reg.predict(tlt_test)

    # FACTOR 4
    # GDP Price of USA for past 5 years

    # US GDP per capita Prices of last 5 years
    GDP_prices = {
            2018: 59607,
            2019: 60698,
            2020: 58453,
            2021: 61855,
            2022: 62551,
            2023: 63451 # Forecast data also available online
        }

    # Normalize the GDP values as it will make the other factors irrelevant as it is very large
    values = list(GDP_prices.values())
    values_array = [[value] for value in values]
    scaler = StandardScaler()
    scaled_values = scaler.fit_transform(values_array)
    scaled_values = scaled_values.flatten()
    scaled_GDP_prices = {year: scaled_value for year, scaled_value in zip(GDP_prices.keys(), scaled_values)}

    # print(scaled_GDP_prices)

    for index, row in factor_returns.iterrows():
        # Extract the year from the date
        year = index.year

        # Fill the 'GDP' column with the corresponding GDP price based on the year
        factor_returns.at[index, 'GDP'] = scaled_GDP_prices.get(year)

    future_factors = {'BND': bnd_future, 'SPY': spy_future, 'TLT': tlt_future, 'GDP': [scaled_GDP_prices.get(2023)]*10}
    future_factors = pd.DataFrame(future_factors)

    returns = {}
    future_returns = {}
    for ticker in tickers:
        stock_returns = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
        returns[ticker] = stock_returns

        single_index_reg = Ridge()
        single_index_reg.fit(factor_returns, stock_returns)
        stock_future = single_index_reg.predict(future_factors)
        future_returns[ticker] = stock_future
    #     print("Coefficients: ", single_index_reg.coef_)

    future_returns = pd.DataFrame(future_returns)
    # print(future_returns)

    S = risk_models.sample_cov(future_returns)
    # print(S)


#     print(future_returns.mean())
    ef = EfficientFrontier(future_returns.mean(), S)
    weights = ef.max_sharpe(risk_free_rate=0.0)
    cleaned_weights = ef.clean_weights()
    print(cleaned_weights)

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)



----------------------------------------------------
Non-linear ML model: RandomForestRegressor()
[*********************100%***********************]  3 of 3 completed




OrderedDict([('AAPL', 0.0), ('AMZN', 0.0), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.79289), ('TSLA', 0.20711), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.0019685787880436576
Annual volatility: 0.5284216589090845
Sharpe ratio: 0.0037253938305779276
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()
[*********************100%***********************]  3 of 3 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.01476), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.08275), ('MSFT', 0.0), ('NVDA', 0.01637), ('TSLA', 0.14981), ('UNH', 0.00107), ('XOM', 0.73524)])
Expected annual return: 0.0015471497362253724
Annual volatility: 1.3198975029657125
Sharpe ratio: 0.0011721741519694073
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))
[                       0%                       ]



[*********************100%***********************]  3 of 3 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.01272), ('BRK-B', 0.0), ('GOOGL', 0.0), ('META', 0.0), ('MSFT', 0.0), ('NVDA', 0.54068), ('TSLA', 0.09575), ('UNH', 0.0), ('XOM', 0.35085)])
Expected annual return: 0.0025876506348077102
Annual volatility: 0.41439956613930745
Sharpe ratio: 0.006244337220029394




### Equity valuation model

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from pypfopt import BlackLittermanModel, plotting
from pypfopt import black_litterman, risk_models
import datetime

end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=5*365)
apple_ticker = "AAPL"
apple_data = yf.download(apple_ticker, start=start_date, end=end_date)
microsoft_data = yf.download('MSFT', start=start_date, end=end_date)

# # get dividends of the Apple stock
# apple_stock = yf.Ticker(apple_ticker)
# apple_dividends = apple_stock.dividends.loc[start_date:end_date]
# apple_dividends = apple_dividends.reset_index(drop=True)

# # get dividends of the Microsoft stock
# microsoft_stock = yf.Ticker('MSFT')
# microsoft_dividends = microsoft_stock.dividends.loc[start_date:end_date]
# microsoft_dividends = microsoft_dividends.reset_index(drop=True)

# Determine dividend for each stock
dividends = {}
for ticker in tickers:
    stock = yf.Ticker(ticker)
    dividends[ticker] = stock.dividends.loc[start_date:end_date]
    dividends[ticker] = dividends[ticker].reset_index(drop=True)
    dividends[ticker] = dividends[ticker] * 4
print(dividends)

# # Combine the dividend data into a single DataFrame (We are multiplying by 4 as these are quarterly dividends)
# dividends = pd.DataFrame({'AAPL':apple_dividends*4, 'MSFT':microsoft_dividends*4})
# #print(dividends)

# Calculate the dividend growth rates
dividends = pd.DataFrame(dividends)
dividend_growth_rates = dividends.pct_change().dropna()

# Since we only have dividend changes every 4 months, we remove the 0 values and get the average dividend growth rate which we consider as perpetual growth rate
avg_dividends_growth_rate = dividend_growth_rates.mask(dividend_growth_rates == 0).sum()/5

print("Dividend Growth rate: \n", avg_dividends_growth_rate)


next_period_dividend = {}
for ticker in tickers:
    # Define the features and target variable for the machine learning model
    X = np.unique(dividends[ticker].values)[:-1]  # Independent variables (dividend value for the past period)
    y = np.unique(dividends[ticker].values)[1:]  # Dependent variable (dividend value for the next period)

    # Train a machine learning model using regression to predict the next dividend value
    model = RandomForestRegressor()
    model.fit(X.reshape(-1,1), y)

    # Use the trained model to predict the dividend value for the next period
    next_period_dividend[ticker] = model.predict(dividends[ticker].values[-1].reshape(1, -1))[0]
    print("Predicted next period dividend", next_period_dividend[ticker])


# # PREDICTING NEXT DIVIDEND VALUE FOR APPLE STOCK

# # Define the features and target variable for the machine learning model
# X = np.unique(dividends['AAPL'].values)[:-1]  # Independent variables (dividend value for the past period)
# y = np.unique(dividends['AAPL'].values)[1:]  # Dependent variable (dividend value for the next period)

# # Train a machine learning model using regression to predict the next dividend value
# model = RandomForestRegressor()
# model.fit(X.reshape(-1,1), y)

# # Use the trained model to predict the dividend value for the next period
# next_period_dividend_apple = model.predict(dividends['AAPL'].values[-1].reshape(1, -1))[0]
# print("Predicted next period dividend for APPLE", next_period_dividend_apple)

# # PREDICTING NEXT DIVIDEND VALUE FOR MICROSOFT STOCK

# # Define the features and target variable for the machine learning model
# X = np.unique(dividends['MSFT'].values)[:-1]  # Independent variables (dividend value for the past period)
# y = np.unique(dividends['MSFT'].values)[1:]  # Dependent variable (dividend value for the next period)

# # Train a machine learning model using regression to predict the next dividend value
# model = RandomForestRegressor()
# model.fit(X.reshape(-1,1), y)

# # Use the trained model to predict the dividend value for the next period
# next_period_dividend_microsoft = model.predict(dividends['MSFT'].values[-1].reshape(1, -1))[0]
# print("Predicted next period dividend for Microsoft", next_period_dividend_microsoft)

# tickers = ["AAPL", "AMZN", "BRK-B", "GOOGL", "META", "MSFT", "NVDA", "TSLA", "UNH", "XOM"]


discount_rate = {
    "AAPL": 0.0871,
    "AMZN": 0.0794,
    "BRK-B": 0.0785,
    "GOOGL": 0.0828,
    "META": 0.0828,
    "MSFT": 0.0871,
    "NVDA": 0.0966,
    "TSLA": 0.0942,
    "UNH": 0.0832,
    "XOM": 0.0875
}

intrinsic_prices = {}
for ticker in tickers:
    intrinsic_prices[ticker] = next_period_dividend[ticker] / \
                                    (discount_rate[ticker] - avg_dividends_growth_rate[ticker])
print(intrinsic_prices)

# # Apply the DDM formula to calculate the intrinsic value of the stocks
# discount_rate = 0.078  # Available online
# apple_intrinsic_price = next_period_dividend_apple / \
#     (discount_rate - avg_dividends_growth_rate['AAPL'])

# discount_rate = 0.11
# microsoft_intrinsic_price = next_period_dividend_microsoft / \
#     (discount_rate - avg_dividends_growth_rate['MSFT'])

# print("Intrinsic Value of Apple Stock:", apple_intrinsic_price)
# print("Intrinsic Value of Microsoft Stock:", microsoft_intrinsic_price)





# We can notice that the intrinsic stock values for both Apple and Microsoft are over-valued which makes sense as they are at their all-time high.

# Use the intrinsic value of stock as views for BL model
# Here We are filling our subjective views based on the fair price of the stocks that we calculated using Equity evaluation model
viewdict = {}
for key, value in portfolio_data.items():
    viewdict[key] = intrinsic_prices[key]/value[-1]
print(f"Views: {viewdict}")


# viewdict = {'AAPL': apple_intrinsic_price/apple_data['Close'][-1], 'MSFT': microsoft_intrinsic_price/microsoft_data['Close'][-1]}
# tickers = ['AAPL','MSFT']
mcaps = {}
for t in tickers:
    stock = yf.Ticker(t)
    mcaps[t] = stock.info["marketCap"]
# print(mcaps)




prices = portfolio_data
# prices = pd.DataFrame({'AAPL':apple_data['Close'].values,'MSFT':microsoft_data['Close'].values})
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
market_prices = yf.download("SPY", period="max")["Adj Close"]


delta = black_litterman.market_implied_risk_aversion(market_prices)
# print(delta)
market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)
# print(market_prior)

# Assign confidence measure for stock returns based on some heuristics
confidences = [
    0.8,
    0.6,
    0.7,
    0.5,
    0.5,
    0.7,
    0.3,
    0.3,
    0.4,
    0.7
]

# Fit BL model
bl = BlackLittermanModel(S, pi=market_prior, absolute_views=viewdict, omega="idzorek", view_confidences=confidences)

# Get expected returns
ret_bl = bl.bl_returns()
# print(ret_bl)

# Get cov matrix
S_bl = bl.bl_cov()


from pypfopt import EfficientFrontier, objective_functions

ef = EfficientFrontier(ret_bl, S_bl)
ef.add_objective(objective_functions.L2_reg)
ef.max_sharpe()
weights = ef.clean_weights()
print("---------------------------------------------")
print("Final weights in the portfolio allocation: ")
print(weights)

In [None]:
ret, volatility, sharpe_ratio = ef.portfolio_performance()
print("Expected annual return:", ret)
print("Annual volatility:", volatility)
print("Sharpe ratio:", sharpe_ratio)

### Black Litterman model 

In [15]:
reg_models = [RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(hidden_layer_sizes=(100, 100))]

for reg_model in reg_models:
    print("----------------------------------------------------")
    print(f"Non-linear ML model: {reg_model}")
    
    # Using a non-linear ML model to predict stock prices in future
    returns = {}
    future_returns = {}
    for ticker in tickers:
        stock_returns = ((portfolio_data[ticker] / portfolio_data[ticker].shift(1))-1).dropna().tolist()
        returns[ticker] = stock_returns

        stock_x = [stock_returns[i:i+10] for i in range(len(stock_returns)-20)]
        stock_y = [stock_returns[i+10] for i in range(10,len(stock_returns)-10)]
        stock_test = [stock_returns[i:i+10] for i in range(len(stock_returns)-20,len(stock_returns)-10)]
        reg = RandomForestRegressor()
        reg.fit(stock_x,stock_y)
        future_returns[ticker] = reg.predict(stock_test)

    # Using the predicted future return of stocks as "views" for black litterman model
    viewdict = {key: value[-1] for key, value in future_returns.items()}
    print(f"Views: {viewdict}")

    # Obtain market caps for all stocks in portfolio
    mcaps = {}
    for t in tickers:
        stock = yf.Ticker(t)
        mcaps[t] = stock.info["marketCap"]

    # Determine prior estimate of returns implied by the market weights
    S = risk_models.CovarianceShrinkage(portfolio_data).ledoit_wolf()
    market_prices = yf.download("SPY", period="max")["Adj Close"]
    delta = black_litterman.market_implied_risk_aversion(market_prices)
    market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)

    # Assign confidence measure for stock returns based on some heuristics
    confidences = [
        0.8,
        0.6,
        0.7,
        0.5,
        0.5,
        0.7,
        0.3,
        0.3,
        0.4,
        0.7
    ]

    # Fit the Black litterman model and calculate corresponding cov matrix and returns
    bl = BlackLittermanModel(S, pi=market_prior, absolute_views=viewdict, omega="idzorek", view_confidences=confidences)
    ret_bl = bl.bl_returns()
    S_bl = bl.bl_cov()

    # Find the optimal weights using bl cov matrix and returns
    ef = EfficientFrontier(ret_bl, S_bl)
    ef.add_objective(objective_functions.L2_reg)
    ef.max_sharpe()
    weights = ef.clean_weights()
    print(weights)

    ret, volatility, sharpe_ratio = ef.portfolio_performance()
    print("Expected annual return:", ret)
    print("Annual volatility:", volatility)
    print("Sharpe ratio:", sharpe_ratio)

----------------------------------------------------
Non-linear ML model: RandomForestRegressor()
Views: {'AAPL': 0.006209112706961326, 'AMZN': 0.00020208095672298753, 'BRK-B': -0.0020081740361084575, 'GOOGL': -0.0005828193338722599, 'META': 0.005138835875855064, 'MSFT': -0.0021289639883251468, 'NVDA': 0.007453707958223923, 'TSLA': -0.002047794054805434, 'UNH': 0.0017264527839735567, 'XOM': 0.0025365188668838336}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.03228), ('BRK-B', 0.0), ('GOOGL', 0.00875), ('META', 0.06895), ('MSFT', 0.0), ('NVDA', 0.33891), ('TSLA', 0.55111), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.05854902331028182
Annual volatility: 0.5069027015794578
Sharpe ratio: 0.07604817096094958
----------------------------------------------------
Non-linear ML model: GradientBoostingRegressor()




Views: {'AAPL': 0.006196333025294369, 'AMZN': -0.002198904307982675, 'BRK-B': 3.147106780534537e-05, 'GOOGL': 0.0017650839623249348, 'META': 0.006881351467200067, 'MSFT': -0.001199553848865692, 'NVDA': 0.005910369690964236, 'TSLA': 0.007813659522368505, 'UNH': 0.0031356609977219196, 'XOM': 0.001818884188573151}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.01865), ('BRK-B', 0.0), ('GOOGL', 0.02079), ('META', 0.07633), ('MSFT', 0.0), ('NVDA', 0.3276), ('TSLA', 0.55663), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.060073095579378843
Annual volatility: 0.506791715237835
Sharpe ratio: 0.07907212050728321
----------------------------------------------------
Non-linear ML model: MLPRegressor(hidden_layer_sizes=(100, 100))




Views: {'AAPL': 0.00629254997744275, 'AMZN': -0.002507485707500907, 'BRK-B': -0.0012916000309219466, 'GOOGL': -0.001520742433792084, 'META': 0.008123913109443713, 'MSFT': -0.0006810750047136926, 'NVDA': 0.004440850635622371, 'TSLA': -0.0011307364644292971, 'UNH': 0.002140110500490017, 'XOM': -9.640036431973309e-05}
[*********************100%***********************]  1 of 1 completed
OrderedDict([('AAPL', 0.0), ('AMZN', 0.01833), ('BRK-B', 0.0), ('GOOGL', 0.00741), ('META', 0.08413), ('MSFT', 0.0), ('NVDA', 0.33585), ('TSLA', 0.55428), ('UNH', 0.0), ('XOM', 0.0)])
Expected annual return: 0.05856272031556069
Annual volatility: 0.5079242328024777
Sharpe ratio: 0.07592219040779062


