In [1]:
import yfinance as yf
import pandas
import numpy
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import time
import datetime

tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'INTC', 'AMD', 'NVDA', 'F', 'TSLA', 'JPM', 'MS', 'VOO']

def get_monthly_data_from_yf(ticker, start_date, end_date):
    return yf.download(ticker, start_date, end_date, interval='1mo', progress=False).dropna()

# Get data for all tickers 2016 - 2021
data_ten_years = {}
for ticker in tickers:
    data_ten_years[ticker] = get_monthly_data_from_yf(ticker, '2012-01-01', '2022-01-01')
print('All data loaded')

All data loaded


In [2]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)

def next_month_close_price_rmse(dataset, ticker):
    # fix random seed for reproducibility
    numpy.random.seed(7)
    dataframe = dataset[ticker]
    dataframe = dataframe.loc[:, dataframe.columns.isin(['Close'])]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    # split into train and test sets
    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
    #print("Train data size =", len(train), "Test data size =", len(test))
    # reshape into X=t and Y=t+1
    look_back = 1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    # print test arrays X and Y 
    #for i in range(len(testX)):
    #    print(testX[i], testY[i])
    # reshape input to be [samples, time steps, features]
    trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    #print("LSTM network started...", "Train data size =", len(train), "Test data size =", len(test))
    start_time_ms = time.time() * 1000
    model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)        # verbose=2
    end_time_ms = time.time() * 1000
    #print("LSTM network finished. Time spent = %.2f seconds." % ((end_time_ms - start_time_ms) / 1000))
    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    # calculate root mean squared error
    trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
    #print('Train Score: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    #print('Test Score: %.2f RMSE' % (testScore))
    return [trainScore, testScore]

def weighted_next_month_close_price_rmse(data, ticker):
    rmse_1 = next_month_close_price_rmse(data, ticker)
    rmse_2 = next_month_close_price_rmse(data, ticker)
    rmse_3 = next_month_close_price_rmse(data, ticker)
    trainScoreW = (rmse_1[0] + rmse_2[0] + rmse_3[0]) / 3
    testScoreW = (rmse_1[1] + rmse_2[1] + rmse_3[1]) / 3
    return [trainScoreW, testScoreW]

In [3]:
for ticker in tickers:
    rmse = weighted_next_month_close_price_rmse(data_ten_years, ticker)
    print(ticker, "WEIGHTED RMSE = \t", rmse)

AAPL WEIGHTED RMSE = 	 [1.8426456892511203, 9.813851229918344]
GOOG WEIGHTED RMSE = 	 [1.925383191272326, 8.430572896316013]
MSFT WEIGHTED RMSE = 	 [3.070312114127374, 13.63814816107645]
AMZN WEIGHTED RMSE = 	 [2.480472467476766, 9.937645141997326]
INTC WEIGHTED RMSE = 	 [1.9887155284263922, 4.067101703361555]
AMD WEIGHTED RMSE = 	 [1.1420905668931653, 10.37433128155842]
NVDA WEIGHTED RMSE = 	 [2.5597783004831403, 26.02817493968979]
F WEIGHTED RMSE = 	 [0.7908509284525818, 1.2536612339434676]
TSLA WEIGHTED RMSE = 	 [1.8285419288570106, 30.989142478518726]
JPM WEIGHTED RMSE = 	 [4.019359303649133, 9.272489654343863]
MS WEIGHTED RMSE = 	 [2.286505882020368, 6.478380515207824]
VOO WEIGHTED RMSE = 	 [5.156684866327676, 15.445170390599733]
