In [1]:
import yfinance as yf
import pandas
import numpy
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'INTC', 'AMD', 'NVDA', 'F', 'TSLA', 'JPM', 'MS', 'VOO']

def get_monthly_data_from_yf(ticker, start_date, end_date):
    data = yf.download(ticker, start_date, end_date, interval='1mo', progress=False).dropna()
    data['Close-Previous-Month'] = data['Close'].shift(1)
    return data.dropna()

# Get data for all tickers 2012 - 2021
data_ten_years = {}
for ticker in tickers:
    data_ten_years[ticker] = get_monthly_data_from_yf(ticker, '2012-01-01', '2022-01-01')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [2]:
def mlr_predict_close_price(data, ticker):
    dataset = data[ticker]
    X = dataset[['Close-Previous-Month']].values
    y = dataset['Close'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0, shuffle = False)
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    next_month_pred = regressor.predict([[y[len(y)-1]]])
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    return [next_month_pred, rmse]

In [3]:
predictions_next_month_close_price = {}
for ticker in tickers:
    prediction_and_rmse = mlr_predict_close_price(data_ten_years, ticker)
    prediction = prediction_and_rmse[0][0]
    rmse = prediction_and_rmse[1]
    predictions_next_month_close_price[ticker] = prediction
    print(ticker, "\t RMSE =", rmse, "\t Next Close Price prediction =", prediction)

AAPL 	 RMSE = 8.213356570078494 	 Next Close Price prediction = 182.86104245650768
GOOG 	 RMSE = 6.188487510992034 	 Next Close Price prediction = 146.18793306019006
MSFT 	 RMSE = 11.845219781435198 	 Next Close Price prediction = 348.18188994312834
AMZN 	 RMSE = 11.291925837218503 	 Next Close Price prediction = 175.9450267369508
INTC 	 RMSE = 4.168178885372773 	 Next Close Price prediction = 51.614873596547504
AMD 	 RMSE = 9.974305061390526 	 Next Close Price prediction = 154.71172816232007
NVDA 	 RMSE = 17.377888484898726 	 Next Close Price prediction = 306.1588819490616
F 	 RMSE = 1.1897093288610803 	 Next Close Price prediction = 20.37149645111885
TSLA 	 RMSE = 30.074918521243497 	 Next Close Price prediction = 337.2011023520436
JPM 	 RMSE = 8.953948964415655 	 Next Close Price prediction = 159.54669143130644
MS 	 RMSE = 5.350830305127152 	 Next Close Price prediction = 97.2808310066976
VOO 	 RMSE = 15.178946083128954 	 Next Close Price prediction = 438.97776456396764


In [4]:
import datetime
data_for_portfolio = yf.download(tickers, start='2020-01-01', end='2022-01-01', interval='1mo')['Close'].dropna()
d = datetime.datetime.strptime("01/01/2022","%d/%m/%Y")
data_for_portfolio_with_prediction = data_for_portfolio.append(pandas.DataFrame(index=[d]))
for ticker in tickers:
    data_for_portfolio_with_prediction.loc[d, ticker] = predictions_next_month_close_price[ticker]

data_for_portfolio_with_prediction.tail()

[*********************100%***********************]  12 of 12 completed


Unnamed: 0,AAPL,AMD,AMZN,F,GOOG,INTC,JPM,MS,MSFT,NVDA,TSLA,VOO
2021-09-01,141.5,102.900002,164.251999,14.16,133.265503,53.279999,163.690002,97.309998,281.920013,207.160004,258.493347,394.399994
2021-10-01,149.800003,120.230003,168.621506,17.08,148.270493,49.0,169.889999,102.779999,331.619995,255.669998,371.333344,422.160004
2021-11-01,165.300003,158.369995,175.3535,19.190001,142.451996,49.200001,158.830002,94.82,330.589996,326.76001,381.58667,419.059998
2021-12-01,177.570007,143.899994,166.716995,20.77,144.679504,51.5,158.350006,98.160004,336.320007,294.109985,352.26001,436.570007
2022-01-01,182.861042,154.711728,175.945027,20.371496,146.187933,51.614874,159.546691,97.280831,348.18189,306.158882,337.201102,438.977765


In [5]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

### Portfolio based on real data + next month prediction
mu = mean_historical_return(data_for_portfolio_with_prediction, frequency=12)
S = CovarianceShrinkage(data_for_portfolio_with_prediction, frequency=12).ledoit_wolf()
ef = EfficientFrontier(mu, S, weight_bounds=(0,1))
#weights = ef.max_sharpe()
weights = ef.min_volatility()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
print('- - -')
ef.portfolio_performance(verbose=True)

OrderedDict([('AAPL', 0.0), ('AMD', 0.00925), ('AMZN', 0.14413), ('F', 0.03003), ('GOOG', 0.05887), ('INTC', 0.23252), ('JPM', 0.10767), ('MS', 0.0), ('MSFT', 0.1716), ('NVDA', 0.09668), ('TSLA', 0.0), ('VOO', 0.14926)])
- - -
Expected annual return: 31.2%
Annual volatility: 16.7%
Sharpe Ratio: 1.75


(0.3116805524162408, 0.16706303991595814, 1.7459310722645303)