In [1]:
# https://www.wrighters.io/parameters-jupyter-notebooks-with-papermill/
import papermill as pm
from itertools import product

In [2]:
# verbose = True  # True prints more output
verbose = False  # True prints more output

# write run results to df_eval_results
# store_results = False
store_results = True

# number of the most-common symbols from days_lookbacks' performance rankings to keep
n_top_syms = 20  

syms_start = 0  #  start index of n_top_syms for evaluation
syms_end = 10  #  end index of n_top_syms for evaluation

# number of max lookback tuples to create
n_samples_train = 400  # default samples for train
# n_samples_train = 12  # debug


In [3]:
run_types = ['train', 'validate', 'test']

# l_days_eval = [4, 5, 6] 
l_days_eval = [4, 5] 
# l_days_eval = [4] 

l_days_lookbacks = [
    [15],
    # [30],
    # [60],
    # [120],
    [15, 30],
    # [30, 60],
    # [60, 120],
    # [15, 30, 60],
    # [30, 60, 120],
    # [15, 30, 60, 120],
    ]

In [4]:
run_params = dict(
                  verbose=verbose,
                  store_results=store_results,
                  n_top_syms=n_top_syms,
                  syms_start=syms_start,
                  syms_end=syms_end,
                  )

run_params

{'verbose': False,
 'store_results': True,
 'n_top_syms': 20,
 'syms_start': 0,
 'syms_end': 10}

In [5]:
len_list = len(list(product(run_types, l_days_eval, l_days_lookbacks)))
len_list

12

In [6]:
list(product(run_types, l_days_eval, l_days_lookbacks))

[('train', 4, [15]),
 ('train', 4, [15, 30]),
 ('train', 5, [15]),
 ('train', 5, [15, 30]),
 ('validate', 4, [15]),
 ('validate', 4, [15, 30]),
 ('validate', 5, [15]),
 ('validate', 5, [15, 30]),
 ('test', 4, [15]),
 ('test', 4, [15, 30]),
 ('test', 5, [15]),
 ('test', 5, [15, 30])]

#### Check the run parameters before running. It will take about 1 hour and 20 minutes to run

In [None]:
# takes 1.5 hrs to run 90 iteration
len_list = len(list(product(run_types, l_days_eval, l_days_lookbacks)))

i = 1
for run_type, days_eval, days_lookbacks in list(product(run_types, l_days_eval, l_days_lookbacks)):
  # use n_samples_train to scale n_samples of other run_types 
  if run_type == 'train':
    n_samples = n_samples_train  # train
    fp_df_eval_results = f'df_eval_results_train'    
  elif run_type == 'validate':
    n_samples = round(n_samples_train * 0.2/0.7)  # validate, proportion to .7,.2,.1 split of train, validate, test
    fp_df_eval_results = f'df_eval_results_validate'    
  elif run_type == 'test':
    n_samples = round(n_samples_train * 0.1/0.7)  # test, proportion to .7,.2,.1 split of train, validate, test  
    fp_df_eval_results = f'df_eval_results_test'
  elif run_type == 'current':
    fp_df_eval_results = 'Not_Applicable'
    days_eval = 0  # no need to eval when getting the current picks    
    n_samples = 1
    # fp_df_eval_results = f'df_eval_results_test'              
  else:
    n_samples = 2  # debug
    fp_df_eval_results = f'df_eval_results' 

  run_params['run_type'] = run_type
  run_params['fp_df_eval_results'] = fp_df_eval_results  
  run_params['n_samples'] = n_samples
  run_params['days_eval'] = days_eval
  run_params['days_lookbacks'] = days_lookbacks

  # print(f'run_type: {run_type:<10}    fp_df_eval_results: {fp_df_eval_results:<26}    n_samples: {n_samples:<5.0f}    days_eval: {days_eval:<5.0f}    days_lookbacks: {days_lookbacks}')
  print(f'{i:<4} of {len_list}    run_type: {run_type:<10}    fp_df_eval_results: {fp_df_eval_results:<26}    n_samples: {n_samples:<5.0f}    days_eval: {days_eval:<5.0f}    days_lookbacks: {days_lookbacks}')

  res = pm.execute_notebook(
    'yf_7train.ipynb',
    'yf_7trainPM_out.ipynb',    
    parameters = run_params 
  )

  i += 1

#### Check run results

In [None]:
# from myUtils import pickle_load
# path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
# path_data_dump = path_dir + "VSCode_dump/"

# # df = pickle_load(path_data_dump, 'df_eval_results_train')
# # df = pickle_load(path_data_dump, 'df_eval_results_validate')
# # df = pickle_load(path_data_dump, 'df_eval_results_test')
# df = pickle_load(path_data_dump, 'df_picks')
# df

In [None]:
# # tbl_concat.loc[tbl_concat['run_type'] == 'train']
# df.loc[df['date_end_df_train'] == '2023-03-17', 'sym_freq_9']
# # df.loc[df['date_end_df_train'] == '2023-03-17', 'sym_freq_8']