In [1]:
def compare_lists(list_a, list_b):
  """Compares two lists and returns a list of values that are in list A but not in list B.

  Args:
    list_a: A list of objects.
    list_b: A list of objects.

  Returns:
    A list of values that are in list_a but not in list_b.
  """

  list_difference = []

  for item in list_a:
    if item not in list_b:
      list_difference.append(item)

  return list_difference

In [2]:
import pandas as pd
from myUtils import pickle_load, pickle_dump
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_colwidth', 30)
pd.set_option('display.width', 900)

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

fp_df_picks  = f'df_picks'
fp_df_model_top_picks = f'df_model_top_picks'

In [3]:
df_picks = pickle_load(path_data_dump, fp_df_picks)
# drop duplicates
df_picks = df_picks.drop_duplicates(subset=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], keep='last')
# sort, most recent date is first
df_picks = df_picks.sort_values(by=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], ascending=False)
# re-index
df_picks = df_picks.reset_index(drop=True)
# save results
pickle_dump(df_picks, path_data_dump, fp_df_picks)
print(f'df_picks, len({len(df_picks)}):\n{df_picks}')

df_picks, len(274):
    date_end_df_train  max_days_lookbacks     days_lookbacks sym_freq_15 sym_freq_14 sym_freq_13      sym_freq_12              sym_freq_11       sym_freq_10               sym_freq_9               sym_freq_8                     sym_freq_7                     sym_freq_6                     sym_freq_5                     sym_freq_4                     sym_freq_3 sym_freq_2
0          2023-11-03                 120      [30, 60, 120]          []          []          []               []                       []                []          ['FTSM', 'SHV']                  ['ANF']                             []         ['EDU', 'GPS', 'ICPT']  ['GBTC', 'HRB', 'LRN', 'NR...                       ['CBOE']  ['AGYS', 'BTC-USD', 'CLS',...         []
1          2023-11-03                 120  [15, 30, 60, 120]          []          []          []               []                  ['SHV']          ['FTSM']                       []           ['ANF', 'LRN']               ['GBTC', 'ICP

In [4]:
start_date = df_picks.date_end_df_train.min()
end_date = df_picks.date_end_df_train.max()
print(f'df_picks start date: {start_date}')
print(f'df_picks end date: {end_date}')

df_picks start date: 2023-03-15
df_picks end date: 2023-11-03


In [5]:
l_dates_df_picks = df_picks.date_end_df_train.unique().tolist()  # unique dates in df_picks
print(f'l_dates_df_picks, len({len(l_dates_df_picks)}):\n{l_dates_df_picks}')

l_dates_df_picks, len(137):
['2023-11-03', '2023-11-02', '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27', '2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23', '2023-10-20', '2023-10-19', '2023-10-17', '2023-10-12', '2023-10-06', '2023-10-05', '2023-10-04', '2023-10-02', '2023-09-29', '2023-09-28', '2023-09-25', '2023-09-22', '2023-09-21', '2023-09-20', '2023-09-18', '2023-09-15', '2023-09-14', '2023-09-13', '2023-09-12', '2023-09-11', '2023-09-08', '2023-09-05', '2023-09-01', '2023-08-25', '2023-08-18', '2023-08-17', '2023-08-15', '2023-08-14', '2023-08-11', '2023-08-10', '2023-08-09', '2023-08-08', '2023-08-07', '2023-08-04', '2023-08-03', '2023-08-02', '2023-08-01', '2023-07-27', '2023-07-26', '2023-07-25', '2023-07-21', '2023-07-20', '2023-07-18', '2023-07-14', '2023-07-13', '2023-07-12', '2023-07-11', '2023-07-10', '2023-07-07', '2023-07-06', '2023-07-05', '2023-07-03', '2023-06-30', '2023-06-29', '2023-06-28', '2023-06-27', '2023-06-26', '2023-06-23', '2023-06-22', '2023

In [6]:
import pandas_market_calendars as mcal
nyse = mcal.get_calendar('NYSE')
# NYSE dates from df_picks start date to end date
dates_NYSE = nyse.valid_days(start_date=start_date, end_date=end_date).strftime('%Y-%m-%d')
# print(f'len(dates_NYSE): {len(dates_NYSE)}')
dates_NYSE_reversed_sorted = dates_NYSE.sort_values(ascending=False)
print('NYSE dates from df_picks start date to end date')
print(f'dates_NYSE_reversed_sorted, len({len(dates_NYSE)}):\n{dates_NYSE_reversed_sorted}') 

NYSE dates from df_picks start date to end date
dates_NYSE_reversed_sorted, len(163):
Index(['2023-11-03', '2023-11-02', '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27', '2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
       ...
       '2023-03-28', '2023-03-27', '2023-03-24', '2023-03-23', '2023-03-22', '2023-03-21', '2023-03-20', '2023-03-17', '2023-03-16', '2023-03-15'], dtype='object', length=163)


In [7]:
NYSE_dates_missing_in_df_picks  = compare_lists(dates_NYSE_reversed_sorted, l_dates_df_picks)
NYSE_dates_missing_in_df_picks .sort(reverse=True)  # sorted inplace, newest first
print(f'NYSE_dates_missing_in_df_picks, (len={len(NYSE_dates_missing_in_df_picks )}):\n{NYSE_dates_missing_in_df_picks }')

NYSE_dates_missing_in_df_picks, (len=26):
['2023-10-18', '2023-10-16', '2023-10-13', '2023-10-11', '2023-10-10', '2023-10-09', '2023-10-03', '2023-09-27', '2023-09-26', '2023-09-19', '2023-09-07', '2023-09-06', '2023-08-31', '2023-08-30', '2023-08-29', '2023-08-28', '2023-08-24', '2023-08-23', '2023-08-22', '2023-08-21', '2023-08-16', '2023-07-31', '2023-07-28', '2023-07-24', '2023-07-19', '2023-07-17']


## Add picks from missing dates to df_picks

In [8]:
df_model_picks_results = pickle_load(path_data_dump, 'df_model_picks_results')
print(f'df_model_picks_results, len({len(df_model_picks_results)}):\n{df_model_picks_results}\n')

df_model_picks_results, len(125):
           date      days_lookback    syms_freq                  symbols    date_buy     sh_portf_buy  $_portf_buy sh_SPY_buy   $_SPY_buy   date_sell    sh_portf_sell  $_portf_sell  %_portf_chg sh_SPY_sell  $_SPY_sell  %_SPY_chg  dif_%_chg  win
4    2023-09-01      [30, 60, 120]   sym_freq_9                  ['SHV']  2023-09-05            [9.0]   990.629997      [2.0]  898.479980  2023-09-08            [9.0]    991.440033     0.081770       [2.0]  891.039978  -0.828065   0.909835    1
5    2023-09-01      [30, 60, 120]   sym_freq_8         ['FTSM', 'STRL']  2023-09-05       [8.0, 6.0]   959.080002      [2.0]  898.479980  2023-09-08       [8.0, 6.0]    947.040024    -1.255367       [2.0]  891.039978  -0.828065  -0.427302    0
6    2023-08-25      [30, 60, 120]   sym_freq_9                  ['SHV']  2023-08-28            [9.0]   989.454735      [2.0]  885.520020  2023-08-31            [9.0]    990.350945     0.090576       [2.0]  900.700012   1.714246  -

In [9]:
df_model_picks = pickle_load(path_data_dump, 'df_model_picks')
print(f'df_model_picks, len({len(df_model_picks)}):\n{df_model_picks}')

df_model_picks, len(4):
         date  days_lookback   syms_freq           symbols    date_buy sh_portf_buy  $_portf_buy sh_SPY_buy   $_SPY_buy date_sell sh_portf_sell  $_portf_sell  %_portf_chg sh_SPY_sell  $_SPY_sell  %_SPY_chg  dif_%_chg  win
0  2023-09-08  [30, 60, 120]  sym_freq_9   ['FTSM', 'SHV']        None         None          NaN       None         NaN      None          None           NaN          NaN        None         NaN        NaN        NaN    0
1  2023-09-08  [30, 60, 120]  sym_freq_8  ['DELL', 'STRL']        None         None          NaN       None         NaN      None          None           NaN          NaN        None         NaN        NaN        NaN    0
2  2023-09-05  [30, 60, 120]  sym_freq_9           ['SHV']  2023-09-06        [9.0]   990.809967      [2.0]  892.440002      None          None           NaN          NaN        None         NaN        NaN        NaN    0
3  2023-09-05  [30, 60, 120]  sym_freq_8          ['FTSM']  2023-09-06       [16.0]   95