In [2]:
from yahoo_fin.stock_info import *
from pytickersymbols import PyTickerSymbols
import joblib

In [3]:
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import datetime as dt

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

2023-05-04 19:27:25.054043: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
# last window-size data cannot be used to predict because observations don't exist
def split(data, window, future=1):
  X, y=[], []
  for i in range(len(data)-window+1-future+1):
    X.append(data[i:i+window-1, :])
    y.append(data[i+window-1+future-1, :])
  return np.array(X), np.array(y)

In [7]:
def build_model(X_train):
  # LSTM model
  lstm=Sequential()
  lstm.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
  lstm.add(LSTM(50, activation='relu'))
  lstm.add(Dense(4))
  lstm.compile(loss='mean_squared_error', optimizer='adam')
  return lstm

In [8]:
stock_data = PyTickerSymbols()
nasdaq_100_stocks = stock_data.get_nasdaq_100_nyc_yahoo_tickers()

In [9]:
start_date=dt.date.today()-dt.timedelta(days=1000)
end_date=None
index_as_date=True
interval="1d"

In [11]:
window=5
future=1
split_ratio=0.8
sc=StandardScaler()
new_sc=StandardScaler()
callback=EarlyStopping(monitor='loss', patience=5, verbose=1, baseline=0.005, restore_best_weights=True)

In [17]:
acc_pred=[]

In [18]:
for ticker in nasdaq_100_stocks:
  Xy_data=get_data(ticker, start_date, end_date, index_as_date, interval).dropna().iloc[:, :4]
  Xy_nm=sc.fit_transform(Xy_data.values)
  Xy_nm=pd.DataFrame(data=Xy_nm, index=Xy_data.index, columns=Xy_data.columns)

  X_split, y_split=split(Xy_nm.values, window, future)
  split_idx=int(len(X_split)*split_ratio)
  date_index=Xy_nm.index[:(-1)*window+1+(-1)*future+1]

  X_train, X_test=X_split[:split_idx], X_split[split_idx:]
  y_train, y_test=y_split[:split_idx], y_split[split_idx:]
  X_train_date, X_test_date=date_index[:split_idx], date_index[split_idx:]

  model=build_model(X_train)
  history=model.fit(X_train, y_train, batch_size=4, epochs=50, verbose=2, shuffle=False)

  y_pred=model.predict(X_test)
  accu=r2_score(y_test, y_pred)

  future_pred_X=np.array([Xy_nm.values[(-1)*window+1+(-1)*future+1:, :]])
  future_pred_y=model.predict(future_pred_X)

  new_sc.scale_, new_sc.mean_, new_sc.var_=sc.scale_[-1], sc.mean_[-1], sc.var_[-1]
  future_pred_close=new_sc.inverse_transform(future_pred_y)
  acc_pred.append((accu, future_pred_close))
  model_joblib_dir=f"./{ticker}.joblib"
  sc_joblib_dir=f"./sc_{ticker}.joblib"
  joblib.dump(sc, sc_joblib_dir, compress=True)
  joblib.dump(model, model_joblib_dir, compress=True)
  break

Epoch 1/50
137/137 - 3s - loss: 0.3138 - 3s/epoch - 25ms/step
Epoch 2/50
137/137 - 1s - loss: 0.0707 - 626ms/epoch - 5ms/step
Epoch 3/50
137/137 - 1s - loss: 0.0499 - 520ms/epoch - 4ms/step
Epoch 4/50
137/137 - 0s - loss: 0.0397 - 488ms/epoch - 4ms/step
Epoch 5/50
137/137 - 0s - loss: 0.0314 - 480ms/epoch - 4ms/step
Epoch 6/50
137/137 - 1s - loss: 0.0309 - 549ms/epoch - 4ms/step
Epoch 7/50
137/137 - 0s - loss: 0.0263 - 476ms/epoch - 3ms/step
Epoch 8/50
137/137 - 0s - loss: 0.0300 - 474ms/epoch - 3ms/step
Epoch 9/50
137/137 - 1s - loss: 0.0249 - 512ms/epoch - 4ms/step
Epoch 10/50
137/137 - 1s - loss: 0.0276 - 511ms/epoch - 4ms/step
Epoch 11/50
137/137 - 1s - loss: 0.0245 - 561ms/epoch - 4ms/step
Epoch 12/50
137/137 - 1s - loss: 0.0269 - 620ms/epoch - 5ms/step
Epoch 13/50
137/137 - 1s - loss: 0.0257 - 625ms/epoch - 5ms/step
Epoch 14/50
137/137 - 1s - loss: 0.0275 - 543ms/epoch - 4ms/step
Epoch 15/50
137/137 - 0s - loss: 0.0270 - 473ms/epoch - 3ms/step
Epoch 16/50
137/137 - 0s - loss: 0.0

KeyboardInterrupt: 