In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import random

common_seed = 42
np.random.seed(common_seed)
tf.random.set_seed(common_seed)
random.seed(common_seed)


from dataclasses import dataclass
@dataclass
class TrainParameters():
    import os
    import time
    from typing import List
    from dataclasses import field
    
    import tensorflow as tf
    from tensorflow.keras.layers import LSTM
    from tensorflow.python.keras.engine import base_layer
    
    window_size : int = 60
    predict_step : int = 1
    test_split  : float = 0.20
    feature_columns : list = field(default_factory = lambda: ["adjclose", "volume", "open", "high", "low"])
    scale : bool = True
    shuffle : bool = True
    date_now : time = time.strftime("%Y-%m-%d")
    model_layers: int = 3
    model_cell : base_layer = LSTM
    lstm_neurons : int = 256
    dropout : float = 0.40
    
    ### training parameters
    # mean absolute error loss
    # LOSS = "mae"
    # huber loss
    loss : str = "huber_loss"
    optimizer: str = "adam"
    batch_size : int = 64
    epochs : int = 400

    ticker : str = 'MSFT'

    #both must be dynamoic as they are resolved late.
    @property
    def ticker_file(self):
        import os
        return os.path.join("data", f"{self.ticker}_{self.date_now}.csv")
        
    @property
    def model_name(self):
        # model name to save, making it as unique as possible based on parameters
        return f"{self.date_now}_{self.ticker}-{self.loss}-{self.optimizer}-{self.model_cell.__name__}-seq-{self.window_size}-step-{self.predict_step}-layers-{self.model_layers}-units-{self.lstm_neurons}"


def load_data(p : TrainParameters):
    """
    Loads data from Yahoo Finance source, as well as scaling, shuffling, normalizing and splitting.
    Params:
        ticker (str/pd.DataFrame): the ticker you want to load, examples include AAPL, TESL, etc.
        window_size (int): the historical sequence length (i.e window size) used to predict, default is 50
        scale (bool): whether to scale prices from 0 to 1, default is True
        shuffle (bool): whether to shuffle the data, default is True
        predict_step (int): the future lookup step to predict, default is 1 (e.g next day)
        split_size (float): ratio for test data, default is 0.2 (20% testing data)
        feature_columns (list): the list of features to use to feed into the model, default is everything grabbed from yahoo_fin
    """
    
    from sklearn import preprocessing
    from sklearn.model_selection import train_test_split
    from yahoo_fin import stock_info as si
    from collections import deque
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(p.ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(p.ticker)
    elif isinstance(p.ticker, pd.DataFrame):
        # already loaded, use it directly
        df = p.ticker
    else:
        raise TypeError("ticker can be either a str or a `pd.DataFrame` instances")

    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()

    # make sure that the passed feature_columns exist in the dataframe
    for col in p.feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."

    if p.scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in p.feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler

        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler

    # add the target column (label) by shifting by `predict_step`
    df['future'] = df['adjclose'].shift(-p.predict_step)

    # last `predict_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[p.feature_columns].tail(p.predict_step))
    
    # drop NaNs
    df.dropna(inplace=True)

    sequence_data = []
    sequences = deque(maxlen=p.window_size)

    for entry, target in zip(df[p.feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == p.window_size:
            sequence_data.append([np.array(sequences), target])

    # get the last sequence by appending the last `n_step` sequence with `predict_step` sequence
    # for instance, if window_size=50 and predict_step=10, last_sequence should be of 59 (that is 50+10-1) length
    # this last_sequence will be used to predict in future dates that are not available in the dataset
    last_sequence = list(sequences) + list(last_sequence)
    # shift the last sequence by -1
    last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
    # add to result
    result['last_sequence'] = last_sequence
    
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)

    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)

    # reshape X to fit the neural network
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    
    # split the dataset
    result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, 
                                                                                test_size=p.test_split, shuffle=p.shuffle)
    # return the result
    return result


def create_model(p : TrainParameters):
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout    

    model = Sequential()
    for i in range(p.model_layers):
        if i == 0:
            # first layer
            model.add(p.model_cell(p.lstm_neurons, return_sequences = True, input_shape = (None, p.window_size)))
        elif i == p.model_layers - 1:
            # last layer
            model.add(p.model_cell(p.lstm_neurons, return_sequences= False))
        else:
            # hidden layers
            model.add(p.model_cell(p.lstm_neurons, return_sequences = True))
        # add dropout after each layer
        model.add(Dropout(p.dropout))
        
    #final layer is 1 output given "prediction"
    model.add(Dense(1, activation="linear"))
    model.compile(loss = p.loss, metrics=["mean_absolute_error"], optimizer = p.optimizer)
    return model



In [78]:
a = TrainParameters(ticker='MSFT', epochs=5)
data = load_data(a)

model = create_model(a)

In [None]:
history = model.fit(data["X_train"], data["y_train"],
                    batch_size = a.batch_size,
                    epochs = a.epochs,
                    validation_data=(data["X_test"], data["y_test"]),
                    #callbacks=[checkpointer, tensorboard],
                    verbose=1)

Train on 6836 samples, validate on 1710 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
 832/6836 [==>...........................] - ETA: 2s - loss: 2.8324e-04 - mean_absolute_error: 0.0138