In [1]:
import yfinance as yf
import pandas
import numpy
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import time
import datetime

tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'INTC', 'AMD', 'NVDA', 'F', 'TSLA', 'JPM', 'MS', 'VOO']

def get_monthly_data_from_yf(ticker, start_date, end_date):
    return yf.download(ticker, start_date, end_date, interval='1mo', progress=False).dropna()

# Get data for all tickers 2016 - 2021
data_ten_years = {}
for ticker in tickers:
    data_ten_years[ticker] = get_monthly_data_from_yf(ticker, '2012-01-01', '2022-01-01')
print('All data loaded')

Using TensorFlow backend.


All data loaded


In [2]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)

def next_month_close_price_rmse(dataset, ticker):
    # fix random seed for reproducibility
    numpy.random.seed(7)
    dataframe = dataset[ticker]
    dataframe = dataframe.loc[:, dataframe.columns.isin(['Close'])]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    # split into train and test sets
    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
    #print("Train data size =", len(train), "Test data size =", len(test))
    # reshape into X=t and Y=t+1
    look_back = 1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    # print test arrays X and Y 
    #for i in range(len(testX)):
    #    print(testX[i], testY[i])
    # reshape input to be [samples, time steps, features]
    trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    #print("LSTM network started...", "Train data size =", len(train), "Test data size =", len(test))
    start_time_ms = time.time() * 1000
    model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)        # verbose=2
    end_time_ms = time.time() * 1000
    #print("LSTM network finished. Time spent = %.2f seconds." % ((end_time_ms - start_time_ms) / 1000))
    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    # calculate root mean squared error
    trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    print('Test Score: %.2f RMSE' % (testScore))
    return testScore

def weighted_next_month_close_price_rmse(data, ticker):
    rmse_1 = next_month_close_price_rmse(data, ticker)
    rmse_2 = next_month_close_price_rmse(data, ticker)
    rmse_3 = next_month_close_price_rmse(data, ticker)
    return (rmse_1 + rmse_2 + rmse_3) / 3

In [3]:
rmse_next_month_close_price = {}
for ticker in tickers:
    rmse = weighted_next_month_close_price_rmse(data_ten_years, ticker)
    rmse_next_month_close_price[ticker] = rmse
    print(ticker, "WEIGHTED RMSE =", rmse)
print(rmse_next_month_close_price)

Train Score: 1.83 RMSE
Test Score: 8.26 RMSE
Train Score: 1.83 RMSE
Test Score: 14.00 RMSE
Train Score: 1.83 RMSE
Test Score: 8.65 RMSE
AAPL WEIGHTED RMSE = 10.301902775556298
Train Score: 1.92 RMSE
Test Score: 6.22 RMSE
Train Score: 1.92 RMSE
Test Score: 5.99 RMSE
Train Score: 1.91 RMSE
Test Score: 6.12 RMSE
GOOG WEIGHTED RMSE = 6.108269211145167
Train Score: 2.84 RMSE
Test Score: 14.65 RMSE
Train Score: 2.86 RMSE
Test Score: 15.58 RMSE
Train Score: 2.85 RMSE
Test Score: 12.99 RMSE
MSFT WEIGHTED RMSE = 14.40422009468084
Train Score: 2.41 RMSE
Test Score: 11.23 RMSE
Train Score: 2.44 RMSE
Test Score: 10.79 RMSE
Train Score: 2.43 RMSE
Test Score: 9.81 RMSE
AMZN WEIGHTED RMSE = 10.61349060967413
Train Score: 1.98 RMSE
Test Score: 4.01 RMSE
Train Score: 1.99 RMSE
Test Score: 4.07 RMSE
Train Score: 1.98 RMSE
Test Score: 4.00 RMSE
INTC WEIGHTED RMSE = 4.026726013033572
Train Score: 1.12 RMSE
Test Score: 9.61 RMSE
Train Score: 1.12 RMSE
Test Score: 9.44 RMSE
Train Score: 1.12 RMSE
Test Score