# Note

The Machine Learning methods presented here are only some of the ways of approaching the portfolio optimization problem. There are multiple ways of incoporating ML in the financial models and it is up to you to come up with more interesting and appropriate approaches. We have tried to use multiple ML models in different sections as an example.

The general rule of thumb in the approaches is as follows:
- Use non-linear regression for predicting future values of stocks
- Use appropriate linear regression for financial models (Single Index, CAPM) according to their standard formulas

# Load your data

We present here a basic way of importing stock data which will be used in the subsequent sections.

In [None]:
import yfinance as yf
import datetime
# Define the ticker symbols for the market index and individual assets
market_index_ticker = "^GSPC"  # S&P 500 index
google_ticker = "GOOGL"  # Google
apple_ticker = "AAPL"  # Apple

# Define the start and end dates for the historical data
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=5*365)

# Fetch the historical data using yfinance
market_index_data = yf.download(
    market_index_ticker, start=start_date, end=end_date)
google_data = yf.download(google_ticker, start=start_date, end=end_date)
apple_data = yf.download(apple_ticker, start=start_date, end=end_date)

# Print the fetched data
print("Market Index Data:")
print(market_index_data.head())
print("\nGoogle Data:")
print(google_data.head())
print("\nApple Data:")
print(apple_data.head())


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Market Index Data:
                   Open         High          Low        Close    Adj Close  \
Date                                                                          
2018-06-29  2727.129883  2743.260010  2718.030029  2718.370117  2718.370117   
2018-07-02  2704.949951  2727.260010  2698.949951  2726.709961  2726.709961   
2018-07-03  2733.270020  2736.580078  2711.159912  2713.219971  2713.219971   
2018-07-05  2724.189941  2737.830078  2716.020020  2736.610107  2736.610107   
2018-07-06  2737.679932  2764.409912  2733.520020  2759.820068  2759.820068   

                Volume  
Date                    
2018-06-29  3586800000  
2018-07-02  3095040000  
2018-07-03  1911460000  
2018-07-05  2980160000  
2018-07-06  2590250000  

Google Data:
                 Open       Hig

# Mean Variance Optimization

- Load the stock prices for your selected stocks
- Divide data into independent variable X (10 consecutive days of stock values), and dependent variable y (stock value at 10th day into future)
- Train non linear regression model to predict stock value 10 day into the future
- Use the 10 future stock values and apply mean-variance optimization using pypfopt module

In [None]:
!pip install PyPortfolioOpt

Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.5-py3-none-any.whl (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.5


In [None]:
from pypfopt import plotting
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from pypfopt import expected_returns, risk_models, EfficientFrontier


apple_prices = apple_data['Close'].tolist()
google_prices = google_data['Close'].tolist()

# Dependent variable - 10 consecutive days of stock prices
apple_x = [apple_prices[i:i+10] for i in range(len(apple_prices)-20)]
# Independent variable - stock price 10th day into the future
apple_y = [apple_prices[i+10] for i in range(10,len(apple_prices)-10)]

apple_test = [apple_prices[i:i+10] for i in range(len(apple_prices)-20,len(apple_prices)-10)]

reg = RandomForestRegressor()
reg.fit(apple_x,apple_y)
# Predict stock price for 10 future days
apple = reg.predict(apple_test)

google_x = [google_prices[i:i+10] for i in range(len(google_prices)-20)]
google_y = [google_prices[i+10] for i in range(10,len(google_prices)-10)]

google_test = [google_prices[i:i+10] for i in range(len(google_prices)-20,len(google_prices)-10)]

reg = RandomForestRegressor()
reg.fit(google_x,google_y)
google = reg.predict(google_test)

future_prices = {'apple':apple,'google':google}
future_prices = pd.DataFrame(future_prices)


# Construct covariance matrix of future stock prices
# cov_matrix = risk_models.sample_cov(future_prices)
S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()
# plotting.plot_covariance(S, plot_correlation=True)

# Use capm to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)
print(mu)
# Do mean variance optimization using efficient frontier
ef = EfficientFrontier(mu, S)
ef.min_volatility()
weights = ef.clean_weights()
print(weights)
# weights = ef.max_sharpe(risk_free_rate=0.02)
# cleaned_weights = ef.clean_weights()
# print(cleaned_weights)

apple    -0.067769
google   -1.134717
Name: mkt, dtype: float64
OrderedDict([('apple', 0.5), ('google', 0.5)])


# Single Index Model

- Load the stock prices for your selected stocks
- Divide data into independent variable X (10 consecutive days of market index values), and dependent variable y (market index value at 10th day into future)
- Train non linear regression model to predict market index value 10 day into the future
- Fit separate linear models each for a stock according to formula of single index model
- The index model can
be written as the following regression equation:

 $$R_{i}(t) = \alpha_{i} + \beta_{i}R_{M}(t) + e_{i}(t)$$
 Since, $E(e_{i}) = 0$
 $$E(R_{i}) = \alpha_{i} + \beta_{i}E(R_{M})$$
- Use the return values of the stock(y) and the market(x) and fit a linear regression model
- Compare the coefficients of the fitted line to find $\alpha_{i}$ and $\beta_{i}$
- Use 10 predicted market returns to predict returns of each stock
- Use the Treynor Black model for portfolio optimization
- Step 1: $$w_{i}^{0} = \frac{\alpha_{i}}{\sigma^{2}(e_{i})}$$
- Step 2: $$w_{i} = \frac{w_{i}^{0}}{\sum w_{i}^{0}}$$
- Step 3: $$\alpha_{A} = \sum w_{i}\alpha_{i}$$
- Step 4: $$\sigma^{2}(e_{A}) = \sum w_{i}^{2}\sigma^{2}(e_{i})$$
- Step 5: $$w_{A}^{0} = \frac{\alpha_{A}/\sigma^{2}(e_{A})}{E(R_{m})/\sigma^{2}_{M}}$$
- Step 6: $$\beta_{A} = \sum w_{i}\beta_{i}$$
- Step 7: $$w_{A}^{*} = \frac{w_{A}^{0}}{1+(1-\beta_{A})w_{A}^{0}}$$
- Step 8: $$w_{M}^{*} = 1-w_{A}^{*}$$
$$w_{i}^{*} = w_{A}^{*}w_{i}$$
- Step 9: $$E(R_{P}) = (w_{M}^{*}+w_{A}^{*}\beta_{A})E(R_{M}) +w_{A}^{*}\alpha_{A}$$
- Step 10: $$\sigma_{P}^{2} = (w_{M}^{*}+w_{A}^{*}\beta_{A})^{2}\sigma_{M}^{2} + (w_{A}^{*}\sigma(e_{A}))^{2}$$







In [None]:
from pypfopt import plotting
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from pypfopt import expected_returns, risk_models, EfficientFrontier

# Returns of stocks and market
apple_returns = ((apple_data['Close'] / apple_data['Close'].shift(1))-1).dropna().tolist()
google_returns = ((google_data['Close'] / google_data['Close'].shift(1))-1).dropna().tolist()
market_returns = ((market_index_data['Close'] / market_index_data['Close'].shift(1))-1).dropna().tolist()

# Future marker returns prediction
market_x = [market_returns[i:i+10] for i in range(len(market_returns)-20)]
market_y = [market_returns[i+10] for i in range(10, len(market_returns)-10)]

market_test = [market_returns[i:i+10]
              for i in range(len(market_returns)-20, len(market_returns)-10)]

reg = RandomForestRegressor()
reg.fit(market_x, market_y)
market_future = reg.predict(market_test)

# Fitting single index model regression on market return and stock return
single_index_reg_apple = LinearRegression()
single_index_reg_apple.fit(np.array(market_returns).reshape(-1, 1),y=apple_returns)
apple_future = single_index_reg_apple.predict(np.array(market_future).reshape(-1, 1))


# Beta for apple stock
beta_apple = single_index_reg_apple.coef_[0]
# alpha for apple stock
alpha_apple = single_index_reg_apple.intercept_
print("Beta of Apple: ", beta_apple)
print("Alpha of Apple: ", alpha_apple)

y_pred = single_index_reg_apple.predict(np.array(market_returns).reshape(-1, 1))
residuals = apple_returns - y_pred
residual_variance_apple = np.var(residuals)

# variance of the residuals for apple
print("Residual variance of Apple: ", residual_variance_apple)
print("-------------------------")

single_index_reg_google = LinearRegression()
single_index_reg_google.fit(np.array(market_returns).reshape(-1, 1),google_returns)
google_future = single_index_reg_google.predict(
    np.array(market_future).reshape(-1, 1))

# Beta for google stock
beta_google = single_index_reg_google.coef_[0]
# alpha for google stock
alpha_google = single_index_reg_google.intercept_
print("Beta of Google: ", beta_google)
print("Alpha of Google: ", alpha_google)

y_pred = single_index_reg_google.predict(np.array(market_returns).reshape(-1, 1))
residuals = google_returns - y_pred
residual_variance_google = np.var(residuals)

# variance of the residuals for apple
print("Residual variance of Google: ", residual_variance_google)
print("-------------------------")

# Using Treynor Black model for portfolio optimization

#STEP 1:
# Compute the initial position of each security:
w_apple = alpha_apple/residual_variance_apple
w_google = alpha_google/residual_variance_google
print("Apple Weight:", w_apple)
print("Google Weight:", w_google)

#STEP 2:
# Scale the initial positions:
w_apple_scaled = w_apple/(w_apple+w_google)
w_google_scaled = w_google/(w_apple+w_google)
print("Apple Weight Scaled:", w_apple_scaled)
print("Google Weight Scaled:", w_google_scaled)

#STEP 3:
# Compute the alpha of the active portfolio:
alpha_portfolio = w_apple_scaled*alpha_apple+w_google_scaled*alpha_google
print("Alpha of the active portfolio: ", alpha_portfolio)

#STEP 4:
# Compute the residual variance of active portfolio:
residual_variance_portfolio = w_apple_scaled*w_apple_scaled*residual_variance_apple + w_google_scaled*w_google_scaled*residual_variance_google
print("Residual variance of the active portfolio: ", residual_variance_portfolio)

#STEP 5:
# Compute the initial position in active portfolio:
# Note: The S&P 500’s long-term standard deviation (volatility) is around 12%. Hence, variance of S&P is 0.0114
residual_variance_market = 0.0114
risk_premium_market = 0.056
initial_position_portfolio = (alpha_portfolio*residual_variance_market)/(residual_variance_portfolio*risk_premium_market)
print("Initial position of active portfolio: ", initial_position_portfolio)

#STEP 6:
# Compute the beta of active portfolio:
beta_portfolio = w_apple_scaled*beta_apple+w_google_scaled*beta_google
print("Beta of the active portfolio: ", beta_portfolio)

#STEP 7:
# Adjust the initial position in active portfolio
adjusted_position_portfolio = initial_position_portfolio/1+(1-beta_portfolio)*initial_position_portfolio
print("Adjusted position of the active portfolio: ", adjusted_position_portfolio)

#STEP 8:
# Optimal risky portfolio now has weights:
final_weight_market = 1-adjusted_position_portfolio
final_weight_apple = adjusted_position_portfolio*w_apple_scaled
final_weight_google = adjusted_position_portfolio*w_google_scaled

print("------------------------------")
print("Final Weights: ")
print("Weight Market S&P: ", final_weight_market)
print("Weight Apple: ", final_weight_apple)
print("Weight Google: ", final_weight_google)
print("------------------------------")


#STEP 9:
# Calculate the risk premium of P (Optimal risky portfolio):
risk_premium_porfolio = (final_weight_market+adjusted_position_portfolio*beta_portfolio)*risk_premium_market + adjusted_position_portfolio*alpha_portfolio
print("Risk premium of portfolio: ", risk_premium_porfolio)

#STEP 10:
# Compute the variance of Portfolio:
portfolio_variance = (final_weight_market+adjusted_position_portfolio*beta_portfolio)*(final_weight_market+adjusted_position_portfolio*beta_portfolio)*residual_variance_market + adjusted_position_portfolio*adjusted_position_portfolio*residual_variance_portfolio
print("Variance of portfolio: ", portfolio_variance)

Beta of Apple:  1.2353960938060866
Alpha of Apple:  0.0007528131251585613
Residual variance of Apple:  0.0001548384113993885
-------------------------
Beta of Google:  1.142061640295758
Alpha of Google:  0.0002510766332060436
Residual variance of Google:  0.00016078843613160276
-------------------------
Apple Weight: 4.861927465897098
Google Weight: 1.5615341454067095
Apple Weight Scaled: 0.7569014590732869
Google Weight Scaled: 0.24309854092671315
Alpha of the active portfolio:  0.0006308417160352167
Residual variance of the active portfolio:  9.820899605363582e-05
Initial position of active portfolio:  1.3076332565868463
Beta of the active portfolio:  1.2127066243395337
Adjusted position of the active portfolio:  1.029491000704147
------------------------------
Final Weights: 
Weight Market S&P:  -0.029491000704146897
Weight Apple:  0.779223240535787
Weight Google:  0.25026776016835994
------------------------------
Risk premium of portfolio:  0.06891230098019865
Variance of portfoli

# CAPM

Similar to Single Index Model.

Here the Regression equation is:

$$R_{i} = R_{f} + β_{i} (R_{m} - R_{f})$$

We will fit a linear regression for this equation to find $\beta_{i}$
$$R_{i} - R_{f} = β_{i} (R_{m} - R_{f}) + \text{error}$$

Here we are assuming the risk free rate of return($R_{f}$) as 0.01 - T-bill rate

In [None]:
from pypfopt import expected_returns
from pypfopt import plotting
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Ridge
from pypfopt import expected_returns, risk_models, EfficientFrontier

# T-bill rate
risk_free_rate = 0.01

apple_returns = (
    (apple_data['Close'] / apple_data['Close'].shift(1))-1-risk_free_rate).dropna().tolist()
google_returns = (
    (google_data['Close'] / google_data['Close'].shift(1))-1-risk_free_rate).dropna().tolist()
market_returns = (
    (market_index_data['Close'] / market_index_data['Close'].shift(1))-1-risk_free_rate).dropna().tolist()

# apple_prices = apple_data['Close'].tolist()
# google_prices = google_data['Close'].tolist()
market_x = [market_returns[i:i+10] for i in range(len(market_returns)-20)]
market_y = [market_returns[i+10] for i in range(10, len(market_returns)-10)]

market_test = [market_returns[i:i+10]
               for i in range(len(market_returns)-20, len(market_returns)-10)]

reg = MLPRegressor(hidden_layer_sizes=(100,100))
reg.fit(market_x, market_y)
market_future = reg.predict(market_test)

# print(market_future)

single_index_reg = Ridge()
single_index_reg.fit(np.array(market_returns).reshape(-1, 1), y=apple_returns)
apple_future = single_index_reg.predict(np.array(market_future).reshape(-1, 1))

single_index_reg = Ridge()
single_index_reg.fit(np.array(market_returns).reshape(-1, 1), google_returns)
google_future = single_index_reg.predict(
    np.array(market_future).reshape(-1, 1))

future_returns = {'apple': apple, 'google': google}
future_returns = pd.DataFrame(future_returns)


S = risk_models.sample_cov(future_returns)
# S = risk_models.CovarianceShrinkage(future_returns).ledoit_wolf()
# plotting.plot_covariance(S, plot_correlation=True)
# You don't have to provide expected returns in this case


ef = EfficientFrontier(None, S)
ef.min_volatility()
weights = ef.clean_weights()
print(weights)
# weights = ef.max_sharpe(risk_free_rate=-0.2)
# cleaned_weights = ef.clean_weights()
# print(cleaned_weights)
# print(mu)


OrderedDict([('apple', 0.9464), ('google', 0.0536)])


# Multifactor model

- Load the market factors affecting your stocks : S&P 500(F1), GDP(F2), and 20+ Year Treasury Bond ETFs(F3)

We have chosen these 3 factors randomly. S&P is the market factor, GDP is a macroeconomic factor and we have picked Treasury bond as the third factor.
- Divide data into independent variable X (10 consecutive days of factor values), and dependent variable y (factor value at 10th day into future)
- Train non linear regression model to predict market index value 10 day into the future
- Fit separate linear models each for a stock according to formula of multi factor model
- Here the linear equation is:
$$r = E(r) + \beta_{1}F_{1} + \beta_{2}F_{2} + \beta_{3}F_{3} + e$$
E(r) = expected return on the security \\
$F_{i}$ = the i-th factor \\
$\beta_{i}$ = the security’s sensitivity to movements in the i-th factor \\
e = the idiosyncratic component of the security’s return
- Find $\beta_{i}$ from the fitted line to make the future predictions
- Use 10 predicted factor returns to predict returns of each stock
- Use the future returns of stocks to get stock weights using Mean Variance model

In [None]:
from pypfopt import expected_returns
from pypfopt import plotting
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Ridge
from pypfopt import expected_returns, risk_models, EfficientFrontier
from sklearn.preprocessing import StandardScaler

# Example tickers for S&P 500, and 20+ Year Treasury Bond ETFs
factor_tickers = ['SPY', 'TLT']

# Fetch historical data for the macroeconomic factors S&P and Treasury Bond
factor_data = yf.download(
    factor_tickers, start=start_date, end=end_date, progress=False)
factor_data = factor_data['Adj Close']

# Calculate the returns for the S&P and Treasury bond factors
factor_returns = factor_data.pct_change().dropna()
spy_returns = factor_returns['SPY']
tlt_returns = factor_returns['TLT']

# FACTOR 1
# Train ML model to predict future price of S&P market factors
spy_x = [spy_returns[i:i+10] for i in range(len(spy_returns)-20)]
spy_y = [spy_returns[i+10] for i in range(10, len(spy_returns)-10)]

spy_test = [spy_returns[i:i+10]
               for i in range(len(spy_returns)-20, len(spy_returns)-10)]
reg = MLPRegressor(hidden_layer_sizes=(100, 100))
reg.fit(spy_x, spy_y)
spy_future = reg.predict(spy_test)

# FACTOR 2
# Train ML model to predict future price of Treasury Bond ETF factors
tlt_x = [tlt_returns[i:i+10] for i in range(len(tlt_returns)-20)]
tlt_y = [tlt_returns[i+10] for i in range(10, len(tlt_returns)-10)]

tlt_test = [tlt_returns[i:i+10]
               for i in range(len(tlt_returns)-20, len(tlt_returns)-10)]

reg = MLPRegressor(hidden_layer_sizes=(100, 100))
reg.fit(tlt_x, tlt_y)
tlt_future = reg.predict(tlt_test)

# FACTOR 3
# GDP Price of USA for past 5 years

# US GDP per capita Prices of last 5 years
GDP_prices = {
        2018: 59607,
        2019: 60698,
        2020: 58453,
        2021: 61855,
        2022: 62551,
        2023: 63451 # Forecast data also available online
    }

# Normalize the GDP values as it will make the other factors irrelevant as it is very large
values = list(GDP_prices.values())
values_array = [[value] for value in values]
scaler = StandardScaler()
scaled_values = scaler.fit_transform(values_array)
scaled_values = scaled_values.flatten()
scaled_GDP_prices = {year: scaled_value for year, scaled_value in zip(GDP_prices.keys(), scaled_values)}

print(scaled_GDP_prices)

for index, row in factor_returns.iterrows():
    # Extract the year from the date
    year = index.year

    # Fill the 'GDP' column with the corresponding GDP price based on the year
    factor_returns.at[index, 'GDP'] = scaled_GDP_prices.get(year)

future_factors = {'SPY': spy_future, 'TLT': tlt_future, 'GDP': [scaled_GDP_prices.get(2023)]*10}
future_factors = pd.DataFrame(future_factors)
#print(future_factors)

# 𝑟=𝐸(𝑟)+𝛽1𝐹1+𝛽2𝐹2+𝛽3𝐹3+𝑒 : Fit linear model to find the betas for the 3 factors chosen
# Fit Multi Factor Linear Regression model using Macroeconomic factors as X and stock returns as y
apple_returns = (
    (apple_data['Close'] / apple_data['Close'].shift(1))-1).dropna().tolist()
google_returns = (
    (google_data['Close'] / google_data['Close'].shift(1))-1).dropna().tolist()

single_index_reg = Ridge()
single_index_reg.fit(factor_returns, apple_returns)
apple_future = single_index_reg.predict(future_factors)
print("Coefficients for Apple stock: ", single_index_reg.coef_)

single_index_reg = Ridge()
single_index_reg.fit(factor_returns, google_returns)
google_future = single_index_reg.predict(
    future_factors)
print("Coefficients for Google stock: ", single_index_reg.coef_)

future_returns = {'apple': apple_future, 'google': google_future}
# print(list(zip(apple,google)))
future_returns = pd.DataFrame(future_returns)
print(future_returns)

S = risk_models.sample_cov(future_returns)
print(S)
# S = risk_models.CovarianceShrinkage(future_returns).ledoit_wolf()
# plotting.plot_covariance(S, plot_correlation=True)
# You don't have to provide expected returns in this case

print(future_returns.mean())
ef = EfficientFrontier(future_returns.mean(), S)
# ef.min_volatility()
# weights = ef.clean_weights()
# print(weights)
weights = ef.max_sharpe(risk_free_rate=0.0)
cleaned_weights = ef.clean_weights()
print(cleaned_weights)


{2018: -0.8731875620129659, 2019: -0.23617811356352036, 2020: -1.5469812407578423, 2021: 0.43936719519542417, 2022: 0.8457453584592317, 2023: 1.3712343626796726}
Coefficients for Apple stock:  [ 0.23225802 -0.03096973 -0.00047622]
Coefficients for Google stock:  [ 2.14860963e-01 -2.50223907e-02 -1.32401069e-04]
      apple    google
0  0.000517  0.000480
1  0.000908  0.000835
2  0.000292  0.000272
3  0.000660  0.000604
4  0.000526  0.000487
5  0.000580  0.000533
6  0.000694  0.000639
7  0.000419  0.000387
8  0.000796  0.000737
9  0.000488  0.000451
             apple      google
apple   112.212401  110.421241
google  110.421241  108.688960
apple     0.000588
google    0.000542
dtype: float64
OrderedDict([('apple', 1.0), ('google', 0.0)])


# Equity valuation model - Dividend Model

Equity valuation models are used to estimate the intrinsic value of a company's stock

We are using the Dividend Discount Model (DDM):
The Dividend Discount Model values a stock by calculating the present value of its future expected dividends

The formula for the DDM is:

$V = \frac{D_{1}}{(r - g)}$

Where:

V is the intrinsic value of the stock \\
D_{i} is the expected dividend per share in the next period \\
r is the required rate of return or the discount rate \\
g is the expected growth rate of dividends

- Load dividend values of your stock
- Identify the period of change and only keep unique values in a period
- Train non linear regression model to predict next dividend value
- Calculate average dividend growth rate of your stock
- Assume a discount rate for the stock
- Use ML model to predict next dividend value
- Use the predicted dividend value, discount rate and average dividend growth rate to find stock price

**Note:**

We will use the intrinsic stock value returned by the Equity valuation model as input to the Subjective views dictionary of black litterman model for portfolio allocation.

Based on the results of the instrinsic value of stock from the equity valuation model and by comparing it with the current stock price, we can decide to whether buy or sell the stocks. Using these intrinsic values, weights or constraints can be added in the black litterman model.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from pypfopt import BlackLittermanModel, plotting
from pypfopt import black_litterman, risk_models

microsoft_data = yf.download('MSFT', start=start_date, end=end_date)

# get dividends of the Apple stock
apple_stock = yf.Ticker(apple_ticker)
apple_dividends = apple_stock.dividends.loc[start_date:end_date]
apple_dividends = apple_dividends.reset_index(drop=True)

# get dividends of the Microsoft stock
microsoft_stock = yf.Ticker('MSFT')
microsoft_dividends = microsoft_stock.dividends.loc[start_date:end_date]
microsoft_dividends = microsoft_dividends.reset_index(drop=True)

# Combine the dividend data into a single DataFrame (We are multiplying by 4 as these are quarterly dividends)
dividends = pd.DataFrame({'AAPL':apple_dividends*4, 'MSFT':microsoft_dividends*4})
#print(dividends)

# Calculate the dividend growth rates
dividend_growth_rates = dividends.pct_change().dropna()

# Since we only have dividend changes every 4 months, we remove the 0 values and get the average dividend growth rate which we consider as perpetual growth rate
avg_dividends_growth_rate = dividend_growth_rates.mask(dividend_growth_rates == 0).sum()/5

print("Dividend Growth rate: \n", avg_dividends_growth_rate)

# PREDICTING NEXT DIVIDEND VALUE FOR APPLE STOCK

# Define the features and target variable for the machine learning model
X = np.unique(dividends['AAPL'].values)[:-1]  # Independent variables (dividend value for the past period)
y = np.unique(dividends['AAPL'].values)[1:]  # Dependent variable (dividend value for the next period)

# Train a machine learning model using regression to predict the next dividend value
model = RandomForestRegressor()
model.fit(X.reshape(-1,1), y)

# Use the trained model to predict the dividend value for the next period
next_period_dividend_apple = model.predict(dividends['AAPL'].values[-1].reshape(1, -1))[0]
print("Predicted next period dividend for APPLE", next_period_dividend_apple)

# PREDICTING NEXT DIVIDEND VALUE FOR MICROSOFT STOCK

# Define the features and target variable for the machine learning model
X = np.unique(dividends['MSFT'].values)[:-1]  # Independent variables (dividend value for the past period)
y = np.unique(dividends['MSFT'].values)[1:]  # Dependent variable (dividend value for the next period)

# Train a machine learning model using regression to predict the next dividend value
model = RandomForestRegressor()
model.fit(X.reshape(-1,1), y)

# Use the trained model to predict the dividend value for the next period
next_period_dividend_microsoft = model.predict(dividends['MSFT'].values[-1].reshape(1, -1))[0]
print("Predicted next period dividend for Microsoft", next_period_dividend_microsoft)

# Apply the DDM formula to calculate the intrinsic value of the stocks
discount_rate = 0.078  # Available online
apple_intrinsic_price = next_period_dividend_apple / \
    (discount_rate - avg_dividends_growth_rate['AAPL'])

discount_rate = 0.11
microsoft_intrinsic_price = next_period_dividend_microsoft / \
    (discount_rate - avg_dividends_growth_rate['MSFT'])

print("Intrinsic Value of Apple Stock:", apple_intrinsic_price)
print("Intrinsic Value of Microsoft Stock:", microsoft_intrinsic_price)

# We can notice that the intrinsic stock values for both Apple and Microsoft are over-valued which makes sense as they are at their all-time high.

# Use the intrinsic value of stock as views for BL model
# Here We are filling our subjective views based on the fair price of the stocks that we calculated using Equity evaluation model
viewdict = {'AAPL': apple_intrinsic_price/apple_data['Close'][-1], 'MSFT': microsoft_intrinsic_price/microsoft_data['Close'][-1]}
tickers = ['AAPL','MSFT']
mcaps = {}
for t in tickers:
    stock = yf.Ticker(t)
    mcaps[t] = stock.info["marketCap"]
# print(mcaps)

prices = pd.DataFrame({'AAPL':apple_data['Close'].values,'MSFT':microsoft_data['Close'].values})
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
market_prices = yf.download("SPY", period="max")["Adj Close"]


delta = black_litterman.market_implied_risk_aversion(market_prices)
# print(delta)
market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)
# print(market_prior)

# Assign confidence measure for stock returns based on some heuristics
confidences = [
    0.6,
    0.4
]

# Fit BL model
bl = BlackLittermanModel(S, pi=market_prior, absolute_views=viewdict, omega="idzorek", view_confidences=confidences)

# Get expected returns
ret_bl = bl.bl_returns()
# print(ret_bl)

# Get cov matrix
S_bl = bl.bl_cov()


from pypfopt import EfficientFrontier, objective_functions

ef = EfficientFrontier(ret_bl, S_bl)
ef.add_objective(objective_functions.L2_reg)
ef.max_sharpe()
weights = ef.clean_weights()
print("---------------------------------------------")
print("Final weights in the portfolio allocation: ")
print(weights)

[*********************100%***********************]  1 of 1 completed


  apple_dividends = apple_stock.dividends.loc[start_date:end_date]
  microsoft_dividends = microsoft_stock.dividends.loc[start_date:end_date]


Dividend Growth rate: 
 AAPL    0.056367
MSFT    0.101178
dtype: float64
Predicted next period dividend for APPLE 0.949999999999999
Predicted next period dividend for Microsoft 2.6151999999999997
Intrinsic Value of Apple Stock: 43.91362822272131
Intrinsic Value of Microsoft Stock: 296.4408090632195
[*********************100%***********************]  1 of 1 completed
---------------------------------------------
Final weights in the portfolio allocation: 
OrderedDict([('AAPL', 0.42874), ('MSFT', 0.57126)])




# Black litterman model

In this model, we are using Machine learning to forecast the returns and using that to fill the subjective views(confidence)

- Use non linear ML model to predict stock prices in future (same as we did in mean-variance section)
- Use the predicted future return of stocks as you "views" for black litterman model
- Assign confidence using accuracy of your ML model or arbitrarily based on your market understanding
- Fit the Black litterman model and calculate corresponding cov matrix and returns
- Find the optimal weights using bl cov matrix and returns

In [None]:
from pypfopt import BlackLittermanModel, plotting
from pypfopt import black_litterman, risk_models

# Fit non linear ML models to predict stock returns into future
apple_returns = (
    (apple_data['Close'] / apple_data['Close'].shift(1))-1).dropna().tolist()
google_returns = (
    (google_data['Close'] / google_data['Close'].shift(1))-1).dropna().tolist()

apple_x = [apple_returns[i:i+10] for i in range(len(apple_returns)-20)]
apple_y = [apple_returns[i+10] for i in range(10,len(apple_returns)-10)]

apple_test = [apple_returns[i:i+10] for i in range(len(apple_returns)-20,len(apple_returns)-10)]

reg = RandomForestRegressor()
reg.fit(apple_x,apple_y)
apple = reg.predict(apple_test)

google_x = [google_returns[i:i+10] for i in range(len(google_returns)-20)]
google_y = [google_returns[i+10] for i in range(10,len(google_returns)-10)]

google_test = [google_returns[i:i+10] for i in range(len(google_returns)-20,len(google_returns)-10)]

reg = RandomForestRegressor()
reg.fit(google_x,google_y)
google = reg.predict(google_test)

future_returns = {'apple':apple,'google':google}
print(apple[-1],google[-1])

# Use predicted future returns as views for BL model
viewdict = {'AAPL': apple[-1], 'GOOGL': google[-1]}
tickers = ['AAPL','GOOGL']
mcaps = {}
for t in tickers:
    stock = yf.Ticker(t)
    mcaps[t] = stock.info["marketCap"]
print(mcaps)

prices = pd.DataFrame({'AAPL':apple_data['Close'].values,'GOOGL':google_data['Close'].values})
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
market_prices = yf.download("SPY", period="max")["Adj Close"]


delta = black_litterman.market_implied_risk_aversion(market_prices)
print(delta)
market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)
print(market_prior)

# Assign confidence measure for stock returns based on some heuristics
confidences = [
    0.6,
    0.4
]

# Fit BL model
bl = BlackLittermanModel(S, pi=market_prior, absolute_views=viewdict, omega="idzorek", view_confidences=confidences)

# Get expected returns
ret_bl = bl.bl_returns()
print(ret_bl)

# Get cov matrix
S_bl = bl.bl_cov()


from pypfopt import EfficientFrontier, objective_functions

ef = EfficientFrontier(ret_bl, S_bl)
ef.add_objective(objective_functions.L2_reg)
ef.max_sharpe()
weights = ef.clean_weights()
print(weights)


0.0005149357850170555 -0.007898421716305592
{'AAPL': 2957939310592, 'GOOGL': 1506293579776}
[*********************100%***********************]  1 of 1 completed
2.581431738882907
AAPL     0.273215
GOOGL    0.233136
dtype: float64
AAPL     0.092459
GOOGL    0.082701
dtype: float64
OrderedDict([('AAPL', 0.53921), ('GOOGL', 0.46079)])




# Algorithmic trading - Using Bollinger Bands (You can use any alternative approach like EMA etc)

- Install talib to calculate Bollinger Bands
- Calculate Bollinger bands for your stock using historical data
- Divide data into independent variable X (stock prices) and dependent variable y (sell -1 if stock price > upper limit of bollinger band, buy 1 otherwise)
- Train a classifier to predict the buy or sell based on stock price
- Use the trained classifier to trade the stock in real time
- The algorithmic trading is more useful in intra day or high frequency trading scenario and is not generally done for long term portfolio holdings
- However, you can buy or sell the complete stock based on this everyday to maximize your capital

Install this package on your system: https://pypi.org/project/TA-Lib/

In [None]:
import numpy as np
import pandas as pd
import talib
from sklearn.ensemble import RandomForestClassifier


apple_prices = apple_data['Close']

# Calculate Bollinger Bands for Apple
apple_bb_upper, apple_bb_middle, apple_bb_lower = talib.BBANDS(
    apple_prices, timeperiod=20)


# Calculate the Bollinger Bands percentages for Apple
apple_bb_percentage = (apple_prices - apple_bb_lower) / \
    (apple_bb_upper - apple_bb_lower)

# # Combine the Bollinger Bands percentages into a single DataFrame
# bb_percentages = pd.concat(
#     [apple_bb_percentage, google_bb_percentage], axis=1).dropna()

# Define the features and target variable for the machine learning model
# Independent variables (Bollinger Bands percentages for the past period)
X = apple_bb_percentage.values[:-1]
# Target variable (-1 for sell, 1 for buy)
y = np.where(apple_prices.values[1:] > apple_bb_upper[:-1], -1, 1)

# Train a machine learning model using random forest classifier
model = RandomForestClassifier(n_estimators=100)
model.fit(X, y)

# Use the trained model to predict the trading signals for the testing set
# Ideally this will be your real time stock prices
y_pred = model.predict(X_test)

# Perform algorithmic trading based on the predicted signals (example logic)
capital = 100000  # Initial capital in USD
position = 0  # Current position (0 for neutral, 1 for long, -1 for short)

for i in range(len(y_pred)):
    if y_pred[i] == 1 and position != 1:  # Buy signal
        position = 1
        # Place a buy order based on your trading platform's API or logic

    elif y_pred[i] == -1 and position != -1:  # Sell signal
        position = -1
        # Place a sell order based on your trading platform's API or logic

    elif y_pred[i] == 0 and position != 0:  # Exit position
        position = 0
        # Close the existing position based on your trading platform's API or logic

# Calculate the final capital after the trading period
final_capital = capital  # Assume no transaction costs or slippage
# Calculate the final capital based on your trading platform's API or logic

print("Final Capital:", final_capital)
