In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.style.use('fivethirtyeight')

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam

import optuna
from optuna.samplers import RandomSampler


from pandas_datareader import data as pdr
import yfinance as yfin
import datetime as dt

# Yahoo API may have broken previous versions of pd_datareader, so this is a workaround.
yfin.pdr_override()
pd.options.mode.chained_assignment = None  # default='warn'

In [5]:
# Yahoo Finance stock scraping.
# **Careful with how many times you run this to avoid IP ban**
TICKER = 'AMZN'
START = dt.datetime(2012, 1, 1)
END = dt.datetime.today()

stock = pdr.get_data_yahoo(TICKER, START, END).rename(columns= {'Adj Close': 'AdjClose'})

print(stock.shape[0], "unique points loaded with attributes: \n", stock.
      keys())

def series_to_supervised(data, n_in=5, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
    data: Sequence of observations as a list or NumPy array.
    n_in: Number of lag observations as input (X).
    n_out: Number of observations as output (y).
    dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
    Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    if i == 0:
        names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
    else:
        names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
        
    return agg

[*********************100%***********************]  1 of 1 completed
2818 unique points loaded with attributes: 
 Index(['Open', 'High', 'Low', 'Close', 'AdjClose', 'Volume'], dtype='object')


In [7]:
# Specify number of days to use for beta calculation (255 = 1yr).
window = 252 

# Specify a market highly correlated with 'stock'.
market_ticker = 'SPY'

def beta(df, market=None):
    # If the market values are not passed,
    # I'll assume they are located in a column
    # named 'Market'.  If not, this will fail.
    if market is None:
        market = df['MarketClose']
        df = df.drop('MarketClose', axis=1)
    X = market.values.reshape(-1, 1)
    X = np.concatenate([np.ones_like(X), X], axis=1)
    b = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(df.values)
    return float(b[1])

def roll(df, w=252):
    # Takes 'w'-sized slices from dataframe, incrementing 1 entry at a time.
    for i in range(df.shape[0] - w + 1):
        yield pd.DataFrame(df.values[i:i+w, :], df.index[i:i+w],
                           df.columns)


#### Combining stock + market data and computing.
market = pdr.get_data_yahoo(market_ticker,
                            START,
                            END).rename(columns={'Adj Close': 'MarketClose'})

betas = np.array([])
dat = pd.concat([stock.AdjClose, market.MarketClose], axis=1)

for  i, sdf in enumerate(roll(dat.pct_change().dropna(), window)):
    betas = np.append(betas, beta(sdf))

datFull = dat.drop(index=dat.index[:window], axis=0, inplace=False)
datFull['Beta'] = betas.tolist()

[*********************100%***********************]  1 of 1 completed


In [8]:
LAG = 60 # Number of days to use for predicting the following day(s).
DAYS = 1 # Number of days to predict with each lag period.
TRAIN_RATIO = 0.70


# Selecting 'AdjClose' prices as input and target feature for time series.
data = datFull.filter(['AdjClose']).values

# Scaling data. Ensures quicker convergence to solution.
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data)

# Splitting input features and target object, X and y.
supervised_data = series_to_supervised(scaled_data, n_in=LAG, n_out=DAYS)
y = supervised_data['var1(t)'] # Isolating target object.
X = supervised_data.loc[:, supervised_data.columns != 'var1(t)'] 

# Selecting converted data for train-test split.
len_training = int(np.ceil(len(scaled_data) * TRAIN_RATIO))

X_train = X.iloc[0:len_training].to_numpy()
y_train = y.iloc[0:len_training].to_numpy()
# X_train, y_train = np.array(X_train), np.array(y_train)

# We subtract lag since we need the lag days to actually make test predictions.
X_test = X.iloc[len_training-60:].to_numpy()
y_test = data[len_training:]

# Reshaping to obtain 3D reps (currently 2d) to pass into LSTM.
# LSTM expects d1 # of samples, d2 # of timesteps, and d3 # of features.
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

if len(X_test) != len(y_test):
    raise Warning('X_test, y_test length mismatch.')

# generator = TimeseriesGenerator(scaled_data, scaled_data, length=60, batch_size=1)

In [21]:
"""
Optuna implementation that optimizes an LSTM neural network lag series 
stock data using Keras.

We optimize LSTM units, LSTM layer dropout, and learning rate.

IN PROGRESS
Last Revised: 15 Mar 2023
"""




BATCHSIZE = 128 #128
CLASSES = 10
EPOCHS = 1 #10


def create_model(trial):
    model = Sequential()

    units=trial.suggest_int('unit', 64, 128, step=2)
    model.add(
        LSTM(
            units=units,
            activation='tanh',
            recurrent_activation='sigmoid',
            unroll=False,
            use_bias=True,
            dropout=trial.suggest_float('droupout', 0, 1),
            # recurrent_dropout=trial.suggest_float('recurrent_droupout', 0, 1),
            return_sequences=True,
            input_shape=(X_train.shape[1], 1)
        )
    )
    model.add(
        LSTM(
            units=int(units/2),
            activation='tanh',
            recurrent_activation='sigmoid',
            unroll=False,
            use_bias=True,
            dropout=trial.suggest_float('droupout', 0, 1),
            # recurrent_dropout=trial.suggest_float('recurrent_droupout', 0, 1),
            return_sequences=False,
        )
    )
    model.add(
        Dense(
            CLASSES,
            activation='relu',
            use_bias=True
        )
    )
    model.add(
        Dense(
            1,
            activation='relu',
            use_bias=True
        )
    )

    # We compile our model with a sampled learning rate.
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    model.compile(
        loss="mean_squared_error",
        optimizer=Adam(
            learning_rate=0, # learning_rate,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-07
        ),
        metrics=["accuracy"]
    )

    if trial.should_prune():
            raise optuna.TrialPruned()
    
    return model


def objective(trial):
    model = create_model(trial)
    model.fit(
        X_train,
        y_train,
        validation_data=(X_test, y_test),
        shuffle=True,
        batch_size=BATCHSIZE,
        epochs=EPOCHS,
        verbose=True,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(X_test, y_test, verbose=True)
    
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)

    # RMSE.
    rmse = np.sqrt(np.mean(predictions - y_test)**2)

    return rmse #score[1]


if __name__ == "__main__":
    study_name = 'test'
    n_trials = 2
    warnings.warn(
        "Layer LSTM will only use cuDNN high-efficiency kernals\n"
        "when training with layer params 'activation==tanh'\n"
        "'recurrent_activation==sigmoid', 'unroll=False',\n"
        "'use_bias=True', and 'recurrent_dropout=0.0'."
    )
    study = optuna.create_study(direction="maximize", study_name=study_name)
    # Use n_jobs=-1 for full parallelization.
    study.optimize(objective, n_trials=n_trials, n_jobs=1, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

when training with layer params 'activation==tanh'
'recurrent_activation==sigmoid', 'unroll=False',
'use_bias=True', and 'recurrent_dropout=0.0'.
[32m[I 2023-03-16 12:04:06,042][0m A new study created in memory with name: test[0m
2023-03-16 12:04:07.021553: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:07.333624: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:07.515229: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:07.786040: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:08.079546: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is e



2023-03-16 12:04:09.910258: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:10.013151: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:10.144007: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-03-16 12:04:10.913168: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:11.004845: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


 1/25 [>.............................] - ETA: 13s

2023-03-16 12:04:11.131522: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




[32m[I 2023-03-16 12:04:11,933][0m Trial 0 finished with value: 126.74019886334324 and parameters: {'unit': 94, 'droupout': 0.5911348488264657, 'learning_rate': 0.006537840599765375}. Best is trial 0 with value: 126.74019886334324.[0m
2023-03-16 12:04:12.965395: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:13.278528: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:13.444920: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:13.748924: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:14.118341: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU



2023-03-16 12:04:16.544239: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:16.660107: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:16.793230: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-03-16 12:04:17.528861: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 12:04:17.624380: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


 1/25 [>.............................] - ETA: 13s

2023-03-16 12:04:17.755962: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




[32m[I 2023-03-16 12:04:18,540][0m Trial 1 finished with value: 118.01529811417637 and parameters: {'unit': 68, 'droupout': 0.5121910829577211, 'learning_rate': 7.297797251116168e-05}. Best is trial 0 with value: 126.74019886334324.[0m


Number of finished trials: 2
Best trial:
  Value: 126.74019886334324
  Params: 
    unit: 94
    droupout: 0.5911348488264657
    learning_rate: 0.006537840599765375


In [None]:
# Use 'direction='minimize' to minimize runtime, 'maximize' accuracy.
study = optuna.create_study(sampler=RandomSampler(seed=40),
                            direction="minimize",
                            study_name='StockOpt')

In [22]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

seed = 7
numpy.random.seed(seed)

from sklearn.datasets import load_iris

X, encoded_Y = load_iris(return_X_y=True)
mms = MinMaxScaler()
X = mms.fit_transform(X)

dummy_y = np_utils.to_categorical(encoded_Y)

def baseline_model():

    model = Sequential()
    model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
    model.add(Dense(8, activation="relu", kernel_initializer="normal"))
    model.add(Dense(3, activation="softmax", kernel_initializer="normal"))

    model.compile(loss= 'categorical_crossentropy' , optimizer='adam', metrics=[
        'accuracy' ])

    return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=200, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print(results)

  estimator = KerasClassifier(build_fn=baseline_model, epochs=200, verbose=0)
2023-03-16 13:01:18.253206: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:01:31.381487: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:01:31.649176: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:01:43.658059: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:01:43.877487: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-03-16 13:01:56.132536: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:01:56.363459: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-03-16 13:02:08.278950: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:08.535369: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:20.717657: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:20.956950: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:33.438043: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:33.694185: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-03-16 13:02:45.926007: I tensorflow/core/grappler/optimizers/cust

[0.9333334  1.         1.         1.         1.         1.
 1.         0.86666673 1.         0.86666673]


2023-03-16 13:03:21.906189: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
