In [1]:
import sys, os, shutil
import numpy as np
import pandas as pd
import tensorflow as tf

# Add src to path
if not '../src' in sys.path:
    sys.path.insert(0, '../src')

In [6]:
## Add src modules
from W_Preproc import Weekly_Preprocessor as WP
from W_BackTester import W_BackTester
from Strategy import Strategy

In [7]:
'''
Returns an array of boundaries where i is the start of the interval
and i + 1 is the end of the interval
'''
def get_cval_list(start_year, end_year, test_size):
    years_per_segment = np.floor((end_year - start_year) * test_size)
    return np.arange(start_year, end_year, years_per_segment)

In [8]:
'''
Instantiate one preprocessor for each cval segment
'''
def get_wps(year_bounds):
    WPs = []
    for i in range(len(year_bounds) - 1):
        WPs.append(WP(40, year_bounds[i], year_bounds[i + 1] - 1))
    return WPs

In [22]:
'''
Create a generator from a lists of preprocessors
Batch size represents the number of weeks, not the number of
    examples. The number of examples is much larger than the number of
    weeks
'''
def create_gen(wps, weeks_in_batch):
    for wp in wps:
        # Reset current week for each preprocessor
        wp.cur_week = 1
        
    which_wp = lambda x: x % len(wps)
    wp_counter = 0
    n_examples = 0
    while True:
        n_examples = 0
        xs = []
        ys = []
        while n_examples < weeks_in_batch:
            result = wps[which_wp(wp_counter)].get_next_week()
            if result is not None:
                x, y, x_names, prices, companies, b_date, s_date, cur_week = result
                xs.append(x)
                ys.append(y[:, None])
                n_examples += 1
            else:
                wp_counter += 1
        yield np.concatenate(xs, axis=0), np.concatenate(ys, axis=0)[:, 0], x_names

In [17]:
'''
Trains an LSTM given a list of preprocessors
'''
def train_model(
    train_wps,      
    test_wp, 
    tr_batch_weeks = 15, 
    val_batch_weeks = 4, 
    batch_size = 256, 
    epochs = 30,
    iters = 50):
    
    data_generator = create_gen(train_wps, tr_batch_weeks)
    val_generator = create_gen([test_wp], val_batch_weeks)
    cur_x, cur_y, x_names = data_generator.__next__()
    val_x, val_y, _ = val_generator.__next__()
    
    model = tf.keras.models.Sequential()
    model.add(tf.keras.Input(shape=cur_x.shape[1:]))
    model.add(tf.keras.layers.LSTM(100, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(1e-5)))
    model.add(tf.keras.layers.LSTM(50, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(1e-5)))
    model.add(tf.keras.layers.LSTM(30, kernel_regularizer=tf.keras.regularizers.l2(1e-5)))
    model.add(tf.keras.layers.Dense(30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    model.add(tf.keras.layers.Dense(30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    model.add(tf.keras.layers.Dense(30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    model.add(tf.keras.layers.Dense(1, kernel_regularizer=tf.keras.regularizers.l2(1e-4)))

    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.MeanSquaredError())

    for i in range(iters):
        model.fit(cur_x, cur_y, epochs=epochs, batch_size=batch_size, validation_data=(val_x, val_y))
        pred = model.predict(val_x)
        print(i, pred.std())
        del cur_x
        del cur_y
        del val_x
        del val_y
        cur_x, cur_y, _ = data_generator.__next__()
        val_x, val_y, _ = val_generator.__next__()
    
    return model

In [None]:
'''
Get the results by testing the performance of the model on the testing
set, and saving the strategy results
'''
def get_results(model, test_wp):
    buy_cuts = np.linspace(1, 1.1, 10)
    sell_cuts = np.linspace(0.9, 1, 10)
    max_alloc = np.linspace(0.05, 0.5, 10)
    out_dir = new_path = os.path.join('..', 'data_files', 'backtest_data', 'results_' + str(test_wp.start_year))
    strats = []
    for b in buy_cuts:
        for s in sell_cuts:
            for m in max_alloc:
                strats.append(Strategy(100000, b, s, m, out=out_dir))
    
    bt = W_BackTester(
        preprocessor = test_wp,
        strategies = strats,
        model = model)
    
    leg_mse = bt.backtest()
    return leg_mse
    

In [19]:
## Create CVal WPs

In [20]:
start_year = 1995
end_year = 2006
test_size = 0.2

c_val_years = get_cval_list(start_year, end_year, test_size)
wps = get_wps(c_val_years)

In [14]:
## Cross Validate with WPs

In [25]:
for wp in wps:
    print((wp.start_year, wp.end_year))
mses = []
for i, wp in enumerate(wps):
    test_wp = wps[i]
    train_wps = wps[0:i] + wps[(i+1):]
    model = train_model(train_wps, test_wp, iters=1)
    mses.append(get_results(model, test_wp))

(1995.0, 1996.0)
(1997.0, 1998.0)
(1999.0, 2000.0)
(2001.0, 2002.0)
(2003.0, 2004.0)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0 0.0046109525
Trading on 1995-12-15 00:00:00 MSE: 00.0041 HL: 00.4537
Trading on 1995-12-22 00:00:00 MSE: 00.0015 HL: 00.6006
Trading on 1995-12-29 00:00:00 MSE: 00.0030 HL: 00.4441
Trading on 1996-01-05 00:00:00 MSE: 00.0026 HL: 00.3003
Trading on 1996-01-12 00:00:00 MSE: 00.0024 HL: 00.6656
Trading on 1996-01-19 00:00:00 MSE: 00.0030 HL: 00.6178
Trading on 1996-01-26 00:00:00 MSE: 00.0022 HL: 00.6401
Trading on 1996-02-02 00:00:00 MSE: 00.0021 HL: 00.7038
Trading on 1996-02-09 00:00:00 MSE: 00.0013 HL: 00.4204
Trading on 1996-02-16 00:00:00 MSE: 00.0016 H

In [2]:
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Dense(1))
nn.compile(loss='mean_squared_error', optimizer='adam')
nn.fit(np.array([[1, 2], [3, 4]]), np.array([1, 0]), batch_size = 1, epochs = 10)
print(nn.get_weights())

nn.compile(loss='mean_squared_error', optimizer='adam')
print(nn.get_weights())


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[array([[-1.1063333 ],
       [ 0.93625957]], dtype=float32), array([-0.00617704], dtype=float32)]
[array([[-1.1063333 ],
       [ 0.93625957]], dtype=float32), array([-0.00617704], dtype=float32)]


In [None]:
print(np.mean(mses))