## Import

In [2]:
import q_learner as q_learner
import state_string_utils as stringutils
import state_enumerator as stateenum
import NAS 
import netparser
from tensorflow import keras
import numpy as np
import pandas as pd
from tqdm import tqdm
from dateutil.relativedelta import relativedelta
from datetime import datetime
import time
import importlib
import os

## Loading Data

In [4]:
feat_window = 90

# performance measures window: number of years
pm_window = 3
lb_window = int(3 * pm_window * 365.25) + 1

# Window length between training samples: number of days
sample_window = 30

# test period start
test_start_date = '2020-06-30'

# filepath
filepath = '/Users/kz_ke/Documents/Masters/Classes/DL/WM-SecuritySelection-main/data/MF_LargeCap_ExcessReturn_3Y.parquet'

In [5]:
def prepare_data_for_er_ari(filepath, label_window=pm_window):
    er_ari_df = pd.read_parquet(filepath)
    

    data_dict = {ticker: er_ari_df[ticker].dropna() for ticker in er_ari_df.columns}
    tickers_to_remove = []
    
    label_dict = {}
    for ticker, series in tqdm(data_dict.items()):
        if series.isna().sum() == series.shape[0]:
            tickers_to_remove += [ticker]
            continue

        last_date = series.index[-1] - relativedelta(years=pm_window)
        if last_date <= series.index[0]:
            tickers_to_remove.append(ticker)
            continue

        index = series.loc[:series.index[-1] - relativedelta(years=pm_window)].index
        label_dict[ticker] = pd.Series([
            series[date + relativedelta(years=pm_window)] for date in index
        ], index=index)
        
    _ = [data_dict.pop(ticker) for ticker in tickers_to_remove]
    
    return data_dict, label_dict

In [6]:
%%time
data_dict, label_dict = prepare_data_for_er_ari(filepath)

100%|██████████████████████████████████████████████████████████████████████████████| 1330/1330 [05:05<00:00,  4.36it/s]

Wall time: 5min 5s





In [7]:
%%time

tickers = list(data_dict.keys())

train_data = []
train_labels = []

test_data = []
test_labels = []

# test start date
checkpoint = datetime.strptime(test_start_date, '%Y-%m-%d') - relativedelta(years=pm_window)

for ticker in tqdm(tickers):    
    label = label_dict[ticker]
    if label.shape[0] == 0:
        continue
    ts = data_dict[ticker].loc[:label.index[-1]]

    indices = [np.arange(i, i+lb_window, feat_window) for i in range(0, ts.shape[0] - lb_window + 1, sample_window)]
    
    temp_data = np.array([ts.iloc[sub_indices].values for sub_indices in indices])
    if temp_data.shape[0] == 0:
        continue
    temp_labels = np.array([label.loc[ts.index[sub_indices[-1]]] for sub_indices in indices])
    
    train_indices = [idx for idx in range(temp_data.shape[0]) if ts.index[indices[idx][-1]] <= checkpoint]
    test_indices = [idx for idx in range(temp_data.shape[0]) if ts.index[indices[idx][-1]] > checkpoint]
    
    train_data += [temp_data[train_indices]] 
    train_labels += [temp_labels[train_indices]]
    
    test_data += [temp_data[test_indices]] 
    test_labels += [temp_labels[test_indices]]



100%|█████████████████████████████████████████████████████████████████████████████| 1130/1130 [00:08<00:00, 127.49it/s]

Wall time: 8.87 s





In [8]:
def reshape_input_data(x=None, y=None):
    if x is not None:
        if len(x.shape) == 2:
            x = x.reshape(-1, 1, x.shape[1])
        elif len(x.shape) == 3:
            x = x.reshape(-1, x.shape[2], x.shape[1])
        else:
            raise ValueError('Invalid x shape: {}'.format(x.shape))

    if y is not None:
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
        elif len(y.shape) == 2:
            pass
        else:
            raise ValueError('Invalid y shape: {}'.format(y.shape))

    if x is None and y is None:
        return None
    elif x is None and y is not None:
        return y
    elif x is not None and y is None:
        return x
    else:
        return x, y

In [9]:
%%time
x_train = np.concatenate(train_data)[:, np.newaxis, :]
y_train = np.concatenate(train_labels)[:, np.newaxis]

x_test = np.concatenate(test_data)[:, np.newaxis, :]
y_test = np.concatenate(test_labels)

Wall time: 10 ms


## Running NAS

In [4]:
path = '/Users/kz_ke/Documents/Masters/Classes/DL/AutoML3/mylogs'
isExist = os.path.exists(path)
if not isExist:
    os.makedirs(path)
    print("The new directory is created!")

The new directory is created!


In [2]:
_model = __import__('models.AutoML',
                    globals(),
                    locals(),
                    ['state_space_parameters', 'hyper_parameters'], 
                    0)

In [3]:
factory = NAS.NAS('mylogs',
                  _model.state_space_parameters,
                  _model.hyper_parameters,
                  1,
                  1)

In [25]:
net, i = factory.generate_new_netork()
print(net)
p= netparser.parse('net', net)
newnet = netparser.parse_network_structure(p)
model = keras.Sequential(newnet)

#callback = keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=3, restore_best_weights = True)
#model.compile(
#            optimizer=keras.optimizers.Adam(lr=1e-4),
#            loss='categorical_crossentropy',
#            metrics=[keras.metrics.CategoricalAccuracy()]
#        )
model.compile(
            optimizer=keras.optimizers.Adam(lr=1e-4),
            loss='mean_squared_error',
            metrics=[keras.metrics.RootMeanSquaredError()]
        )

model.build(input_shape=(None, None, x_train.shape[-1]))

#history = model.fit(x_train, y_train, batch_size = 40, epochs =1, callbacks=[callback], validation_data=(x_valid, y_valid))
model.fit(x_train, y_train, batch_size = 500, epochs =10)

bestval = model.evaluate(x_test, y_test)[1]

factory.incorporate_trained_net(net, bestval, 1, [i])

[LSTM(150,tanh), LSTM(200,leaky_relu), FC(200, sigmoid), D(0.200000), FC(1, linear)]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Incorporated net, acc: 0.071235, net: [LSTM(150,tanh), LSTM(200,leaky_relu), FC(200, sigmoid), D(0.200000), FC(1, linear)]


## Step by Step Running

In [10]:
net, i = factory.generate_new_netork()
net

'[LSTM(100,sigmoid), FC(200, leaky_relu), D(0.100000), FC(1, leaky_relu)]'

In [11]:
p= netparser.parse('net', net)
newnet = netparser.parse_network_structure(p)

In [12]:
newnet

[<keras.layers.recurrent_v2.LSTM at 0x1963f4ff760>,
 <keras.layers.core.Dense at 0x1963f4ff490>,
 <keras.layers.core.Dropout at 0x1963f4ff310>,
 <keras.layers.core.Dense at 0x1963f4c0f10>]

In [13]:
model = keras.Sequential(newnet)

In [14]:
model.compile(
            optimizer=keras.optimizers.Adam(lr=1e-4),
            loss='mean_squared_error',
            metrics=[keras.metrics.RootMeanSquaredError()]
        )



In [15]:
model.build(input_shape=(None, None, x_train.shape[-1]))

In [20]:
model.fit(x_train, y_train, batch_size = 40, epochs =1)



<keras.callbacks.History at 0x19644e338b0>

In [21]:
bestval = model.evaluate(x_test, y_test)[1]



In [23]:
factory.incorporate_trained_net(net, bestval, 1, [1])

Incorporated net, acc: 0.066895, net: [LSTM(100,sigmoid), FC(200, leaky_relu), D(0.100000), FC(1, leaky_relu)]


In [41]:
factory.replay_dictionary

Unnamed: 0,net,accuracy_best_val,epsilon,iteration
0,"[FC(200, leaky_relu), D(0.000000), FC(1, sigmo...",0.528048,1,1
1,"[LSTM(20,relu), LSTM(10,linear), FC(100, tanh)...",0.074497,1,2
2,"[FC(50, linear), FC(1, tanh)]",0.087128,1,3
0,"[FC(1, linear)]",0.089298,1,4
0,"[FC(1, relu)]",0.074311,1,5
