In [1]:
import os, sys
import numpy as np
import tensorflow as tf
src_path = os.path.join('..', 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)
from W_Preproc import Weekly_Preprocessor as WP

In [2]:
'''
Create a generator from a lists of preprocessors
Batch size represents the number of weeks, not the number of
examples. The number of examples is much larger than the number of
weeks. The generator randomizes over the given wps in hopes of generalizing
over different periods of time
'''
def stochastic_gen(wp, weeks_in_batch):
    num_weeks = lambda wp: (wp.end_year - wp.start_year + 1) * 52
    rand_week = lambda num_weeks: int((np.random.random() * num_weeks) + 1)
    n_examples = 0
    while True:
        n_examples = 0
        xs = []
        ys = []
        while n_examples < weeks_in_batch:
            wp.cur_week = rand_week(num_weeks(wp))
            result = wp.get_next_week()
            if result is not None:
                x, y, x_names, prices, companies, b_date, s_date, cur_week = result
                xs.append(x)
                ys.append(y[:, None])
                n_examples += 1
        yield np.concatenate(xs, axis=0), np.concatenate(ys, axis=0)[:, 0], x_names

In [3]:
train = WP(40, 1970, 1995, binary=True)
val = WP(40, 1996, 2005, binary=True)
test = WP(40, 2010, 2021, binary=True)

In [23]:
data_generator = stochastic_gen(train, 20)
val_generator = stochastic_gen(val, 6)
cur_x, cur_y, x_names = data_generator.__next__()
val_x, val_y, _ = val_generator.__next__()

model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=cur_x.shape[1:]))
model.add(tf.keras.layers.LSTM(100, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(1e-7)))
model.add(tf.keras.layers.LSTM(100, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(1e-7)))
model.add(tf.keras.layers.LSTM(30, kernel_regularizer=tf.keras.regularizers.l2(1e-7)))
model.add(tf.keras.layers.Dense(30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-8)))
model.add(tf.keras.layers.Dense(30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-8)))
model.add(tf.keras.layers.Dense(15, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-8)))
model.add(tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(1e-8)))

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'])

for i in range(5):
    model.fit(cur_x, cur_y, epochs=20, batch_size=128, validation_data=(val_x, val_y))
    pred = model.predict(val_x)
    print(i, pred.std())
    del cur_x
    del cur_y
    del val_x
    del val_y
    cur_x, cur_y, _ = data_generator.__next__()
    val_x, val_y, _ = val_generator.__next__()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
0 5.9604645e-08
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [11]:
data_generator = stochastic_gen(train, 20)
cur_x, _, _ = data_generator.__next__()
model.predict(cur_x[0:10])

array([[0.57250744],
       [0.5725075 ],
       [0.57250774],
       [0.57250744],
       [0.57250774],
       [0.5725075 ],
       [0.5725076 ],
       [0.5725077 ],
       [0.5725076 ],
       [0.5725075 ]], dtype=float32)

In [9]:
data_generator

<generator object stochastic_gen at 0x7f9d7217f3c0>