In [None]:
# bunch of the initialization code #

# imports to be added
import return_score
import results_to_latex

# 1. Optimization

## Kalman Filter

In [None]:
search_space_kalman = [ # 'name' is used directly as a kwarg
    Real(1e-5, 0.1, name='delta', prior='log-uniform'),
    Real(0.5, 4, name='obs_cov_reg', prior='log-uniform'),
    Real(0.001, 0.1, name='trans_cov_avg', prior='log-uniform'),
    Real(0.1, 10, name='obs_cov_avg', prior='log-uniform')
]
SEED = 3178749

# call func
res_kalman = bayesian_optimize_workflow(
    execute_workflow_fn=execute_kalman_workflow,
    top_pair_count=10,
    start_year=2008,
    min_end_year=2016,
    max_end_year=2021,
    search_space=search_space_kalman,
    n_calls=30,
    seed=SEED,
    verbose=True
)
param_names = [dim.name for dim in search_space_kalman]
best_params = {k: res_kalman.x[i] for i, k in enumerate(param_names)}
best_mean_mse = res_kalman.fun

## Transformer

In [None]:
search_space_transformer = [ # 'name' is used directly as a kwarg
    ...
]
SEED = 3178749

# call func
res_transformer = bayesian_optimize_workflow(
    execute_workflow_fn=execute_transformer_workflow,
    top_pair_count=10,
    start_year=2008,
    min_end_year=2016,
    max_end_year=2021,
    search_space=search_space_transformer,
    n_calls=30,
    seed=SEED,
    verbose=True
)
param_names = [dim.name for dim in search_space_transformer]
best_params = {k: res_transformer.x[i] for i, k in enumerate(param_names)}
best_mean_mse = res_transformer.fun

## Time-MoE

In [None]:
search_space_timemoe = [ # 'name' is used directly as a kwarg
    ...
]
SEED = 3178749

# call func
res_timemoe = bayesian_optimize_workflow(
    execute_workflow_fn=execute_timemoe_workflow,
    top_pair_count=10,
    start_year=2008,
    min_end_year=2016,
    max_end_year=2021,
    search_space=search_space_timemoe,
    n_calls=30,
    seed=SEED,
    verbose=True
)
param_names = [dim.name for dim in search_space_timemoe]
best_params = {k: res_timemoe.x[i] for i, k in enumerate(param_names)}
best_mean_mse = res_timemoe.fun

# 2. Results

In [None]:
### Unchanged variables ###
verbose = True
### Unchanged variables ###

## Kalman Filter

In [None]:
# Hard code hyperparameters based on results above
delta: float =
obs_cov_reg: float =
trans_cov_avg: float =
obs_cov_avg: float =

### Year-specific data ###
startDateStr = '2008-01-01'
end_year = 2022
endDateStr = f'{end_year}-12-31'
startDateStrTest = f'{end_year}-01-01'
endDateStrTest = f'{end_year}-12-31'
train_frac, dev_frac = _get_train_dev_frac(startDateStr, endDateStr, startDateStrTest, endDateStrTest)

instrumentIdsNASDAQandNYSE = load_cached_etf_tickers()
data = gather_data_cached_using_truncate(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE, cache_dir='../src/data/cache')
data_close_filtered_1, data_open_filtered_1, data_high_filtered_1, data_low_filtered_1, data_vol_filtered_1, data_original_format_filtered_1 = step_1_filter_remove_nans(data['close'], data['open'], data['high'], data['low'], data['vol'], data)
data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, data_original_format_filtered_2 = step_2_filter_liquidity(data_close_filtered_1, data_open_filtered_1, data_high_filtered_1, data_low_filtered_1, data_vol_filtered_1, data_original_format_filtered_1)

pairs_data_filtered = gather_pairs_data_cached(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE, cache_dir='../src/data/cache')
if pairs_data_filtered is None:
  scores, pvalues, pairs = find_cointegrated_pairs(data_original_format_filtered_2)
  pairs_data = {key:value[1]  for (key, value) in pairs.items()}
  pairs_data = sorted(pairs_data.items(), key=lambda x: x[1])
  pairs_data_filtered = filter_pairs_data(pairs_data) # filter based on cointegration in such a way that we can simply pick the highest pair of stocks in the list.
  save_pairs_data_filtered(pairs_data_filtered, startDateStr, endDateStr, instrumentIds, cache_dir='../src/data/cache')
### Year-specific data ###

### OPTIONAL: define worfklow here for debugging ###


### OPTIONAL: define worfklow here for debugging ###

# Gather results for 2022
results_kalman_2022 = []
all_outputs_kalman_2022 = []
num_results = min(len(pairs_data_filtered), 3)
for i in tqdm(range(num_results), desc = "Gathering [...]"):
    ticker_a, ticker_b = pairs_data_filtered[i][0][0], pairs_data_filtered[i][0][1]
    pair_tup_str_current = f"({ticker_a},{ticker_b})"
    pairs_timeseries_df = combine_pairs_data(data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, ticker_a, ticker_b)
    output_returns = get_gt_yoy_returns_test_dev(pairs_timeseries_df, dev_frac, train_frac, look_back=20)
    gt_yoy, gt_yoy_for_dev_dataset = output_returns['gt_yoy_test'], output_returns['gt_yoy_dev']
    output_model = execute_kalman_workflow(pair_data=pairs_timeseries_df, verbose=verbose, pair_tup_str=pair_tup_str_current, train_frac=train_frac, dev_frac=dev_frac, add_technical_indicators=False)
    yoy_str = f"{output_model['yoy_mean'] * 100:.2f}% +- {output_model['yoy_std'] * 100:.2f}%"
    returns_score = return_score(output_model['yoy_mean'], gt_yoy)
    cointegration_score = pairs_data_filtered[i][1]
    results_kalman_2022.append((pair_tup_str_current, cointegration_score, output_model['val_mse'], output_model['test_mse'], yoy_str, gt_yoy, returns_score)) # (pair, cointegration_score, val, test, yoy_str, gt_yoy, returns_score)
    all_outputs_kalman_2022.append(output_model)

In [None]:
print(results_to_latex(results))

## Transformer

In [None]:
# Hard code hyperparameters based on results above
## optimized hyperparams: architecture ##
d_model: int =
nhead: int =
num_layers: int =
dropout: float =
## optimized hyperparams: architecture ##
## optimized hyperparams: learning algorithm ##
learning_rate: float =
min_learning_rate: float =
warmup_ratio: float =
weight_decay: float =
batch_size: int =
adam_beta1: float =
adam_beta2: float =
adam_epsilon: float =
## optimized hyperparams: learning algorithm ##

## Time-MoE

In [None]:
# Hard code hyperparameters based on results above
## optimized hyperparams: learning algorithm ##
learning_rate: float =
min_learning_rate: float =
warmup_ratio: float =
weight_decay: float =
batch_size: int =
adam_beta1: float =
adam_beta2: float =
adam_epsilon: float =
## optimized hyperparams: learning algorithm ##