In [1]:
import yfinance as yf
import pandas
import numpy
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import time
import datetime

#tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'INTC', 'AMD', 'NVDA', 'F', 'TSLA', 'JPM', 'MS', 'VOO', 
#           'GC=F', 'SI=F', 'CL=F', 
#           'BTC-USD', 'ETH-USD', 'DASH-USD']
tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'INTC', 'AMD', 'NVDA', 'F', 'TSLA', 'JPM', 'MS', 'VOO']

def get_monthly_data_from_yf(ticker, start_date, end_date):
    return yf.download(ticker, start_date, end_date, interval='1mo', progress=False).dropna()

# Get data for all tickers 2016 - 2021
data_ten_years = {}
for ticker in tickers:
    data_ten_years[ticker] = get_monthly_data_from_yf(ticker, '2012-01-01', '2022-01-01')
print('All data loaded')

Using TensorFlow backend.


All data loaded


In [2]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)

def predict_next_month_close_price(dataset, ticker):
    # fix random seed for reproducibility
    numpy.random.seed(7)
    dataframe = dataset[ticker]
    dataframe = dataframe.loc[:, dataframe.columns.isin(['Close'])]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    # split into train and test sets
    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
    #print("Train data size =", len(train), "Test data size =", len(test))
    # reshape into X=t and Y=t+1
    look_back = 1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    # print test arrays X and Y 
    #for i in range(len(testX)):
    #    print(testX[i], testY[i])
    # reshape input to be [samples, time steps, features]
    trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    #print("LSTM network started...", "Train data size =", len(train), "Test data size =", len(test))
    start_time_ms = time.time() * 1000
    model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)        # verbose=2
    end_time_ms = time.time() * 1000
    #print("LSTM network finished. Time spent = %.2f seconds." % ((end_time_ms - start_time_ms) / 1000))
    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    # calculate root mean squared error
    #trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
    #print('Train Score: %.2f RMSE' % (trainScore))
    #testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    #print('Test Score: %.2f RMSE' % (testScore))
    # MAKE PREDICTION FOR NEXT MONTH
    last_real_data = scaler.inverse_transform(test)
    last_real_data = last_real_data[len(last_real_data)-1:]
    #print('last real data =', last_real_data)
    last_real_data_scaled = scaler.transform(last_real_data)
    #print('last real data scaled =', last_real_data_scaled)
    featureX = last_real_data_scaled[(len(last_real_data_scaled)-1):(len(last_real_data_scaled)),:]          # last value as array
    #print('featureX =', featureX.tolist())
    featureX = numpy.reshape(featureX, (featureX.shape[0], 1, featureX.shape[1]))
    prediction_next_month = model.predict(featureX)
    last_real_data_scaled = numpy.concatenate((last_real_data_scaled, prediction_next_month), axis=0)
    #print('last real data + one prediction scaled =', last_real_data_scaled.tolist())
    #print('prediction for next month scaled =', prediction_next_month)
    #print('prdiction for next month = ', scaler.inverse_transform(prediction_next_month))
    next_month_inversed = scaler.inverse_transform(prediction_next_month)[0][0]
    #print(ticker+":", 'Last known Close price =', last_real_data[0][0], '| Next month Close prise =', next_month_inversed)
    return next_month_inversed

def weighted_next_month_close_price(data, ticker):
    prediction_1 = predict_next_month_close_price(data, ticker)
    prediction_2 = predict_next_month_close_price(data, ticker)
    prediction_3 = predict_next_month_close_price(data, ticker)
    return (prediction_1 + prediction_2 + prediction_3) / 3

In [3]:
predictions_next_month_close_price = {}
for ticker in tickers:
    next_month_close_price = weighted_next_month_close_price(data_ten_years, ticker)
    predictions_next_month_close_price[ticker] = next_month_close_price
    print(ticker, "WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION =", next_month_close_price)
print(predictions_next_month_close_price)

AAPL WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 164.60066731770834
GOOG WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 142.93310546875
MSFT WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 311.9393310546875
AMZN WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 167.2774658203125
INTC WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 51.61514790852865
AMD WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 149.06685384114584
NVDA WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 268.7689208984375
F WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 20.109826405843098
TSLA WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 275.62351481119794
JPM WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 153.412109375
MS WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 94.70902506510417
VOO WEIGHTED NEXT MONTH CLOSE PRICE PREDICTION = 421.8831787109375
{'AAPL': 164.60066731770834, 'GOOG': 142.93310546875, 'MSFT': 311.9393310546875, 'AMZN': 167.2774658203125, 'INTC': 51.61514790852865, 'AMD': 149.06685384114584, 'NVDA': 268.7689208984375, 'F': 

In [4]:
data_for_portfolio = yf.download(tickers, start='2021-01-01', end='2022-01-01', interval='1mo')['Close'].dropna()

d = datetime.datetime.strptime("01/01/2022","%d/%m/%Y")
data_for_portfolio_with_prediction = data_for_portfolio.append(pandas.DataFrame(index=[d]))
for ticker in tickers:
    data_for_portfolio_with_prediction.loc[d, ticker] = predictions_next_month_close_price[ticker]

data_for_portfolio

[*********************100%***********************]  12 of 12 completed


Unnamed: 0_level_0,AAPL,AMD,AMZN,F,GOOG,INTC,JPM,MS,MSFT,NVDA,TSLA,VOO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-01-01,131.960007,85.639999,160.309998,10.53,91.787003,55.509998,128.669998,67.050003,231.960007,129.897507,264.51001,340.179993
2021-02-01,121.260002,84.510002,154.6465,11.7,101.843002,60.779999,147.169998,76.870003,232.380005,137.145004,225.166672,349.589996
2021-03-01,122.150002,78.5,154.703995,12.25,103.431503,64.0,152.229996,77.660004,235.770004,133.482498,222.643326,364.299988
2021-04-01,131.460007,81.620003,173.371002,11.54,120.505997,57.529999,153.809998,82.550003,252.179993,150.095001,236.479996,383.570007
2021-05-01,124.610001,80.080002,161.153503,14.53,120.578003,57.119999,164.240005,90.949997,249.679993,162.445007,208.406662,386.130005
2021-06-01,136.960007,93.93,172.007996,14.86,125.316002,56.139999,155.539993,91.690002,270.899994,200.024994,226.566666,393.519989
2021-07-01,145.860001,106.190002,166.379501,13.95,135.220993,53.720001,151.779999,95.980003,284.910004,194.990005,229.066666,403.149994
2021-08-01,151.830002,110.720001,173.539505,13.03,145.462006,54.060001,159.949997,104.43,301.880005,223.850006,245.240005,415.049988
2021-09-01,141.5,102.900002,164.251999,14.16,133.265503,53.279999,163.690002,97.309998,281.920013,207.160004,258.493347,394.399994
2021-10-01,149.800003,120.230003,168.621506,17.08,148.270493,49.0,169.889999,102.779999,331.619995,255.669998,371.333344,422.160004


In [5]:
data_for_portfolio_with_prediction

Unnamed: 0,AAPL,AMD,AMZN,F,GOOG,INTC,JPM,MS,MSFT,NVDA,TSLA,VOO
2021-01-01,131.960007,85.639999,160.309998,10.53,91.787003,55.509998,128.669998,67.050003,231.960007,129.897507,264.51001,340.179993
2021-02-01,121.260002,84.510002,154.6465,11.7,101.843002,60.779999,147.169998,76.870003,232.380005,137.145004,225.166672,349.589996
2021-03-01,122.150002,78.5,154.703995,12.25,103.431503,64.0,152.229996,77.660004,235.770004,133.482498,222.643326,364.299988
2021-04-01,131.460007,81.620003,173.371002,11.54,120.505997,57.529999,153.809998,82.550003,252.179993,150.095001,236.479996,383.570007
2021-05-01,124.610001,80.080002,161.153503,14.53,120.578003,57.119999,164.240005,90.949997,249.679993,162.445007,208.406662,386.130005
2021-06-01,136.960007,93.93,172.007996,14.86,125.316002,56.139999,155.539993,91.690002,270.899994,200.024994,226.566666,393.519989
2021-07-01,145.860001,106.190002,166.379501,13.95,135.220993,53.720001,151.779999,95.980003,284.910004,194.990005,229.066666,403.149994
2021-08-01,151.830002,110.720001,173.539505,13.03,145.462006,54.060001,159.949997,104.43,301.880005,223.850006,245.240005,415.049988
2021-09-01,141.5,102.900002,164.251999,14.16,133.265503,53.279999,163.690002,97.309998,281.920013,207.160004,258.493347,394.399994
2021-10-01,149.800003,120.230003,168.621506,17.08,148.270493,49.0,169.889999,102.779999,331.619995,255.669998,371.333344,422.160004


In [6]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

### Portfolio based only on real data
mu = mean_historical_return(data_for_portfolio, frequency=12)
S = CovarianceShrinkage(data_for_portfolio, frequency=12).ledoit_wolf()
ef = EfficientFrontier(mu, S, weight_bounds=(0,1))
#weights = ef.max_sharpe()
weights = ef.min_volatility()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
print('- - -')
ef.portfolio_performance(verbose=True)

OrderedDict([('AAPL', 0.13354), ('AMD', 0.04401), ('AMZN', 0.11977), ('F', 0.09698), ('GOOG', 0.04209), ('INTC', 0.18178), ('JPM', 0.13698), ('MS', 0.07181), ('MSFT', 0.04444), ('NVDA', 0.0), ('TSLA', 0.01945), ('VOO', 0.10915)])
- - -
Expected annual return: 34.4%
Annual volatility: 8.7%
Sharpe Ratio: 3.71


(0.34440827894230863, 0.08734457829691554, 3.7141203869521076)

In [7]:
### Portfolio based on real data + next month prediction
mu = mean_historical_return(data_for_portfolio_with_prediction, frequency=12)
S = CovarianceShrinkage(data_for_portfolio_with_prediction, frequency=12).ledoit_wolf()
ef = EfficientFrontier(mu, S, weight_bounds=(0,1))
#weights = ef.max_sharpe()
weights = ef.min_volatility()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
print('- - -')
ef.portfolio_performance(verbose=True)

OrderedDict([('AAPL', 0.10956), ('AMD', 0.06037), ('AMZN', 0.15145), ('F', 0.08708), ('GOOG', 0.04417), ('INTC', 0.20385), ('JPM', 0.1387), ('MS', 0.05938), ('MSFT', 0.03053), ('NVDA', 0.0), ('TSLA', 0.0), ('VOO', 0.11491)])
- - -
Expected annual return: 25.7%
Annual volatility: 9.2%
Sharpe Ratio: 2.57


(0.25721938818236206, 0.09232181459449118, 2.569483596312641)