In [1]:
# # create empty DataFrame df_picks and append row
# my_cols = ['date', 'days_lookback', 'syms_freq', 'symbols']
# # Creating Empty DataFrame and Storing it in variable df_picks
# df_model_top_picks = pd.DataFrame(columns=my_cols)
# df_model_top_picks

In [2]:
import pandas as pd
from itertools import product
from ast import literal_eval
from myUtils import pickle_load, pickle_dump

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_colwidth', 30)
pd.set_option('display.width', 900)

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

fp_df_picks  = f'df_picks'  # stock picks by criteria: CAGR/UI, CAGR/rtn_std, rtd/UI
fp_df_model_top_picks = f'df_model_top_picks'  # top stock picks from model developed by back test

verbose = True
# verbose = False

#### Clean df_picks_pick data:<br>- drop duplicates<br>- sort on date<br>- re-index<br>- save

In [3]:
df_picks = pickle_load(path_data_dump, fp_df_picks)
df_picks = df_picks.drop_duplicates(subset=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], keep='last')
df_picks = df_picks.sort_values(by=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], ascending=False)
df_picks = df_picks.reset_index(drop=True)
pickle_dump(df_picks, path_data_dump, fp_df_picks)
print(f'len(df_picks): {len(df_picks)}')
print(df_picks.head())

len(df_picks): 390
  date_end_df_train  max_days_lookbacks     days_lookbacks sym_freq_15 sym_freq_14 sym_freq_13 sym_freq_12 sym_freq_11 sym_freq_10 sym_freq_9      sym_freq_8       sym_freq_7                   sym_freq_6                     sym_freq_5                     sym_freq_4                     sym_freq_3 sym_freq_2
0        2023-12-20                 120      [30, 60, 120]          []          []          []          []          []          []         []  ['GPS', 'SHV']         ['FTSM']                           []     ['ANF', 'HIBB', 'SQ', 'X']  ['LRN', 'M', 'MARA', 'MBI'...  ['BA', 'BPMC', 'FFWM', 'GB...         []
1        2023-12-20                 120  [15, 30, 60, 120]          []          []          []          []          []     ['SHV']         []         ['GPS']  ['FTSM', 'MBI']  ['FFWM', 'HA', 'MARA', 'X']          ['ANF', 'HIBB', 'SQ']  ['HPP', 'LC', 'LRN', 'M', ...                             []         []
2        2023-12-19                 120      [30, 60, 120

#### Get dates in df_picks where values in column "days_lookbacks" matched items in my_days_lookbacks.

In [4]:
my_days_lookbacks = [[30, 60, 120], [15, 30, 60, 120]]

# get list of dates correspond to dates in my_days_lookbacks
# e.g. ['2023-12-15', ... ,  '2023-03-15']
dates_in_days_lookbacks = []
for my_days_lookback in my_days_lookbacks:
  mask_days_lookback = (df_picks['days_lookbacks'] == str(my_days_lookback))
  l_dates = df_picks.loc[mask_days_lookback, 'date_end_df_train'].tolist()     
  dates_in_days_lookbacks.append(l_dates)
# get dates in sublists into one list
dates_in_days_lookbacks = [val for sublist in dates_in_days_lookbacks for val in sublist]
# get unique dates and sort newest to oldest
dates_in_days_lookbacks = sorted(list(set(dates_in_days_lookbacks)), reverse=True)

#### Create list of tuples of all combination of dates_in_days_lookbacks, my_days_lookbacks, my_cols<br> e.g. [('2023-12-15', [30, 60, 120], 'sym_freq_12'), ... , [15, 30, 60, 120], 'sym_freq_8')]

In [5]:
my_cols = ['sym_freq_12', 'sym_freq_9', 'sym_freq_8']

# list of tuples of all combination of dates_in_days_lookbacks, my_days_lookbacks, my_cols
# e.g. [('2023-12-15', [30, 60, 120], 'sym_freq_12'), ... , [15, 30, 60, 120], 'sym_freq_8')]
d_lbk_freq = list(product(dates_in_days_lookbacks, my_days_lookbacks, my_cols))

In [6]:
print(f'Model predicts the best performing symbols are from:')
print(f'1st    {"[30, 60, 120]":<20}{"sym_freq_9":<14}{"days_eval = 4":<20}')
print(f'2nd    {"[15, 30, 60, 120]":<20}{"sym_freq_12":<14}{"days_eval = 5":<20}')
print(f'3rd    {"[30, 60, 120]":<20}{"sym_freq_8":<14}{"days_eval = 4":<20}')

Model predicts the best performing symbols are from:
1st    [30, 60, 120]       sym_freq_9    days_eval = 4       
2nd    [15, 30, 60, 120]   sym_freq_12   days_eval = 5       
3rd    [30, 60, 120]       sym_freq_8    days_eval = 4       


#### Get symbols in df_picks column in my_cols where values in columns "date_end_df_picks_train", "days_lookbacks" matched dates_in_days_lookbacks and my_days_lookbacks.

In [7]:
# picks, e.g.: [(0, []), (1, []), (2, ['SHV']), (3, []), ... , (998, ['ELF']), (999, []), ...]
picks = []
prev_date = ''

for i, item in enumerate(d_lbk_freq):
  _date, my_days_lookback, my_col = item[0], item[1], item[2]
  mask_date_n_days_lookback = (df_picks['date_end_df_train'] == _date) & (df_picks['days_lookbacks'] == str(my_days_lookback))  
  list_in_df_picks = df_picks.loc[mask_date_n_days_lookback, my_col]  # pandas series, e.g. 0 ['AMPH', 'FCN']
  my_str = list_in_df_picks.tolist()[0]  # e.g. string: "['AMPH', 'FCN']"
  my_list = literal_eval(my_str)  # e.g. list: ['AMPH', 'FCN']

  # print symbols picked by the model
  if _date != prev_date:
    if prev_date == '':
      print('\nModel Results:')  
      print(f'{i:<6}{_date:<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}')
    else:
      print(f'\n{i:<6}{_date:<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}') 
  else:
    print(f'{i:<6}{" ":<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}')

  picks.append((i, my_list))
  prev_date = _date


Model Results:
0     2023-12-20    [30, 60, 120]        sym_freq_12: []
1                   [30, 60, 120]        sym_freq_9 : []
2                   [30, 60, 120]        sym_freq_8 : ['GPS', 'SHV']
3                   [15, 30, 60, 120]    sym_freq_12: []
4                   [15, 30, 60, 120]    sym_freq_9 : []
5                   [15, 30, 60, 120]    sym_freq_8 : ['GPS']

6     2023-12-19    [30, 60, 120]        sym_freq_12: []
7                   [30, 60, 120]        sym_freq_9 : []
8                   [30, 60, 120]        sym_freq_8 : ['GPS', 'SHV']
9                   [15, 30, 60, 120]    sym_freq_12: []
10                  [15, 30, 60, 120]    sym_freq_9 : []
11                  [15, 30, 60, 120]    sym_freq_8 : ['GPS']

12    2023-12-18    [30, 60, 120]        sym_freq_12: []
13                  [30, 60, 120]        sym_freq_9 : []
14                  [30, 60, 120]        sym_freq_8 : ['SHV']
15                  [15, 30, 60, 120]    sym_freq_12: []
16                  [15, 30, 60

#### Create iterable z_items of dates_in_days_lookbacks, my_days_lookbacks, my_cols, and picks, e.g.:<br>(('2023-12-15', [30, 60, 120], 'sym_freq_12'), (0, []))<br>(('2023-12-15', [30, 60, 120], 'sym_freq_9'), (1, []))<br>….<br>(('2023-03-15', [15, 30, 60, 120], 'sym_freq_9'), (1150, [SGEN']))<br>(('2023-03-15', [15, 30, 60, 120], 'sym_freq_8'), (1151, ['AMPH', 'FCN']))

In [8]:
# z_items, e.g.:
# (('2023-12-14', [30, 60, 120], 'sym_freq_12'), (0, []))
# ...
# (('2023-03-15', [15, 30, 60, 120], 'sym_freq_8'), (1145, ['AMPH', 'FCN']))
z_items = zip(d_lbk_freq, picks)

#### Criteria of the best stocks from the model

In [9]:
criteria_model_top_picks = [([30, 60, 120], 'sym_freq_9'), ([15, 30, 60, 120], 'sym_freq_12'), ([30, 60, 120], 'sym_freq_8')]

In [10]:
df_model_top_picks = pickle_load(path_data_dump, fp_df_model_top_picks)
print(f'len(df_model_top_picks): {len(df_model_top_picks)}')
print(df_model_top_picks.head(), '\n')
print(df_model_top_picks.tail())


len(df_model_top_picks): 293
         date  days_lookback   syms_freq         symbols
0  2023-12-20  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV']
1  2023-12-19  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV']
2  2023-12-18  [30, 60, 120]  sym_freq_8         ['SHV']
3  2023-12-15  [30, 60, 120]  sym_freq_8         ['SHV']
4  2023-12-13  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV'] 

           date  days_lookback   syms_freq          symbols
288  2023-03-20  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
289  2023-03-17  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
290  2023-03-17  [30, 60, 120]  sym_freq_8           ['GE']
291  2023-03-16  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
292  2023-03-15  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']


#### Add picks to df_model_top_picks if dlb_sym_freq match criteria_model_top_picks and _syms has symbol(s)  

In [11]:
for i, item in enumerate(z_items):
  _date, _days_lookback, _sym_freq, _syms = item[0][0], item[0][1], item[0][2], item[1][1]
  len_df_model_top_picks = len(df_model_top_picks)
  print(f'{i}, {_date}, {_days_lookback}, {_sym_freq}, {_syms}')  
  dlb_sym_freq = (_days_lookback, _sym_freq)
  # add data if dlb_sym_freq in criteria_model_top_picks and _syms has symbol(s)  
  if dlb_sym_freq in criteria_model_top_picks and _syms:
    row_add = [_date, _days_lookback, _sym_freq, _syms]
    df_model_top_picks.loc[len_df_model_top_picks] = row_add    
    print(f'\nadded row {len_df_model_top_picks} to df_model_top_picks:\n{row_add}\n')


0, 2023-12-20, [30, 60, 120], sym_freq_12, []
1, 2023-12-20, [30, 60, 120], sym_freq_9, []
2, 2023-12-20, [30, 60, 120], sym_freq_8, ['GPS', 'SHV']

added row 293 to df_model_top_picks:
['2023-12-20', [30, 60, 120], 'sym_freq_8', ['GPS', 'SHV']]

3, 2023-12-20, [15, 30, 60, 120], sym_freq_12, []
4, 2023-12-20, [15, 30, 60, 120], sym_freq_9, []
5, 2023-12-20, [15, 30, 60, 120], sym_freq_8, ['GPS']
6, 2023-12-19, [30, 60, 120], sym_freq_12, []
7, 2023-12-19, [30, 60, 120], sym_freq_9, []
8, 2023-12-19, [30, 60, 120], sym_freq_8, ['GPS', 'SHV']

added row 294 to df_model_top_picks:
['2023-12-19', [30, 60, 120], 'sym_freq_8', ['GPS', 'SHV']]

9, 2023-12-19, [15, 30, 60, 120], sym_freq_12, []
10, 2023-12-19, [15, 30, 60, 120], sym_freq_9, []
11, 2023-12-19, [15, 30, 60, 120], sym_freq_8, ['GPS']
12, 2023-12-18, [30, 60, 120], sym_freq_12, []
13, 2023-12-18, [30, 60, 120], sym_freq_9, []
14, 2023-12-18, [30, 60, 120], sym_freq_8, ['SHV']

added row 295 to df_model_top_picks:
['2023-12-18', [

#### Cleanup df_model_top_picks<br>- convert data to str<br>- drop duplicates, keep last value<br>- sort date, newest first<br>- re-index

In [12]:
df_model_top_picks = df_model_top_picks.astype(str)
df_model_top_picks = df_model_top_picks.drop_duplicates(keep='last')
df_model_top_picks = df_model_top_picks.sort_values(by=['date', 'days_lookback', 'syms_freq'], ascending=False)
df_model_top_picks = df_model_top_picks.reset_index(drop=True)
pickle_dump(df_model_top_picks, path_data_dump, fp_df_model_top_picks)
print(f'len(df_model_top_picks): {len(df_model_top_picks)}')
print(f'{fp_df_model_top_picks}:\n{df_model_top_picks}')

len(df_model_top_picks): 293
df_model_top_picks:
           date  days_lookback   syms_freq          symbols
0    2023-12-20  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
1    2023-12-19  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
2    2023-12-18  [30, 60, 120]  sym_freq_8          ['SHV']
3    2023-12-15  [30, 60, 120]  sym_freq_8          ['SHV']
4    2023-12-13  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
..          ...            ...         ...              ...
288  2023-03-20  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
289  2023-03-17  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
290  2023-03-17  [30, 60, 120]  sym_freq_8           ['GE']
291  2023-03-16  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
292  2023-03-15  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']

[293 rows x 4 columns]
