In [1]:
def compare_lists(list_a, list_b):
  """Compares two lists and returns a list of values that are in list A but not in list B.

  Args:
    list_a: A list of objects.
    list_b: A list of objects.

  Returns:
    A list of values that are in list_a but not in list_b.
  """

  list_difference = []

  for item in list_a:
    if item not in list_b:
      list_difference.append(item)

  return list_difference

In [2]:
import pandas as pd
from myUtils import pickle_load, pickle_dump
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_colwidth', 30)
pd.set_option('display.width', 900)

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

# pickle file of past picks
fp_df_picks  = f'df_picks'
# pickle file of NYSE dates missing from df_picks
fp_NYSE_dates_missing_in_df_picks = f'NYSE_dates_missing_in_df_picks'

In [3]:
df_picks = pickle_load(path_data_dump, fp_df_picks)
# drop duplicates
df_picks = df_picks.drop_duplicates(subset=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], keep='last')
# sort, most recent date is first
df_picks = df_picks.sort_values(by=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], ascending=False)
# re-index
df_picks = df_picks.reset_index(drop=True)
# save results
pickle_dump(df_picks, path_data_dump, fp_df_picks)
print(f'df_picks, len({len(df_picks)}):\n{df_picks}')

df_picks, len(280):
    date_end_df_train  max_days_lookbacks     days_lookbacks sym_freq_15 sym_freq_14 sym_freq_13 sym_freq_12      sym_freq_11      sym_freq_10              sym_freq_9       sym_freq_8                     sym_freq_7                     sym_freq_6                     sym_freq_5                     sym_freq_4                     sym_freq_3 sym_freq_2
0          2023-11-08                 120      [30, 60, 120]          []          []          []          []               []               []         ['FTSM', 'SHV']          ['NRG']        ['ANF', 'CBOE', 'GBTC']                ['LRN', 'USAC']                 ['EDU', 'GPS']  ['AGYS', 'DELL', 'TGH', 'YY']  ['BTC-USD', 'CAH', 'CLS', ...         []
1          2023-11-08                 120  [15, 30, 60, 120]          []          []          []          []               []  ['FTSM', 'SHV']  ['GBTC', 'LRN', 'NRG']               []  ['AGYS', 'ANF', 'CBOE', 'T...            ['BTC-USD', 'USAC']                 ['EDU', 'GPS']    

In [4]:
start_date = df_picks.date_end_df_train.min()
end_date = df_picks.date_end_df_train.max()
print(f'df_picks start date: {start_date}')
print(f'df_picks end date: {end_date}')

df_picks start date: 2023-03-15
df_picks end date: 2023-11-08


In [5]:
l_dates_df_picks = df_picks.date_end_df_train.unique().tolist()  # unique dates in df_picks
print(f'l_dates_df_picks, len({len(l_dates_df_picks)}):\n{l_dates_df_picks}')

l_dates_df_picks, len(140):
['2023-11-08', '2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02', '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27', '2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23', '2023-10-20', '2023-10-19', '2023-10-17', '2023-10-12', '2023-10-06', '2023-10-05', '2023-10-04', '2023-10-02', '2023-09-29', '2023-09-28', '2023-09-25', '2023-09-22', '2023-09-21', '2023-09-20', '2023-09-18', '2023-09-15', '2023-09-14', '2023-09-13', '2023-09-12', '2023-09-11', '2023-09-08', '2023-09-05', '2023-09-01', '2023-08-25', '2023-08-18', '2023-08-17', '2023-08-15', '2023-08-14', '2023-08-11', '2023-08-10', '2023-08-09', '2023-08-08', '2023-08-07', '2023-08-04', '2023-08-03', '2023-08-02', '2023-08-01', '2023-07-27', '2023-07-26', '2023-07-25', '2023-07-21', '2023-07-20', '2023-07-18', '2023-07-14', '2023-07-13', '2023-07-12', '2023-07-11', '2023-07-10', '2023-07-07', '2023-07-06', '2023-07-05', '2023-07-03', '2023-06-30', '2023-06-29', '2023-06-28', '2023-06-27', '2023

In [6]:
import pandas_market_calendars as mcal
nyse = mcal.get_calendar('NYSE')
# NYSE dates from df_picks start date to end date
dates_NYSE = nyse.valid_days(start_date=start_date, end_date=end_date).strftime('%Y-%m-%d')
# print(f'len(dates_NYSE): {len(dates_NYSE)}')
dates_NYSE_reversed_sorted = dates_NYSE.sort_values(ascending=False)
print('NYSE dates from df_picks start date to end date')
print(f'dates_NYSE_reversed_sorted, len({len(dates_NYSE)}):\n{dates_NYSE_reversed_sorted}') 

NYSE dates from df_picks start date to end date
dates_NYSE_reversed_sorted, len(166):
Index(['2023-11-08', '2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02', '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27', '2023-10-26',
       ...
       '2023-03-28', '2023-03-27', '2023-03-24', '2023-03-23', '2023-03-22', '2023-03-21', '2023-03-20', '2023-03-17', '2023-03-16', '2023-03-15'], dtype='object', length=166)


In [7]:
NYSE_dates_missing_in_df_picks  = compare_lists(dates_NYSE_reversed_sorted, l_dates_df_picks)
NYSE_dates_missing_in_df_picks.sort(reverse=True)  # sorted inplace, newest first
# save NYSE_dates_missing_in_df_picks
pickle_dump(NYSE_dates_missing_in_df_picks, path_data_dump, fp_NYSE_dates_missing_in_df_picks)   
print(f'NYSE_dates_missing_in_df_picks, (len={len(NYSE_dates_missing_in_df_picks )}):\n{NYSE_dates_missing_in_df_picks }')


NYSE_dates_missing_in_df_picks, (len=26):
['2023-10-18', '2023-10-16', '2023-10-13', '2023-10-11', '2023-10-10', '2023-10-09', '2023-10-03', '2023-09-27', '2023-09-26', '2023-09-19', '2023-09-07', '2023-09-06', '2023-08-31', '2023-08-30', '2023-08-29', '2023-08-28', '2023-08-24', '2023-08-23', '2023-08-22', '2023-08-21', '2023-08-16', '2023-07-31', '2023-07-28', '2023-07-24', '2023-07-19', '2023-07-17']


In [8]:
import pandas as pd
from yf_utils import random_slices, lookback_slices
from yf_utils import rank_perf, grp_tuples_sort_sum, top_set_sym_freq_cnt
from myUtils import pickle_load, pickle_dump

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 11)
pd.set_option("display.max_colwidth", 16)
pd.set_option("display.width", 145)

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

fp_df_close_clean = "df_close_clean"

### Set parameters ######################################################################
# fp_df_eval_results = f'df_eval_results_{run_type}'
fp_df_picks = f"df_picks"

# verbose = False  # True prints more output
verbose = True  # True prints more output

store_results = True
# store_results = False

n_samples = 1  # only need 1 sample to get the current picks

# for training, the number of days to lookback from iloc max-lookback iloc_end_train
l_days_lookbacks = [[30, 60, 120], [15, 30, 60, 120]]

l_sorted_days_lookbacks = []
for days_lookbacks in l_days_lookbacks:
    l_sorted_days_lookbacks.append(sorted(days_lookbacks))

# number of days from iloc_end_train are used to evaluate effectiveness of the training
days_eval = 0

# number of the most-common symbols from days_lookbacks' performance rankings to keep
n_top_syms = 20

# slice starts and ends for selecting the best performing symbols
syms_start = 0
syms_end = 10




# get picks of previous days by dropping the last n rows from df_current
#  drop_last_n_rows = 1 drops the last row from df_current
drop_last_n_rows = 0
drop_last_n_rows = 4



#########################################################################

print(f"verbose : {verbose }")
print(f"store_results: {store_results}")
print(f"n_samples: {n_samples}")
print(f"days_lookbacks: {days_lookbacks}")
print(f"days_eval: {days_eval}")
print(f"n_top_syms: {n_top_syms}")
print(f"syms_start: {syms_start}")
print(f"syms_end: {syms_end}")
print(f"fp_df_picks: {fp_df_picks}\n\n")

df_picks = pickle_load(path_data_dump, fp_df_picks)
df_close_clean = pickle_load(path_data_dump, fp_df_close_clean)
df_current = df_close_clean.copy()

for i, days_lookbacks in enumerate(l_sorted_days_lookbacks):

    print(f'\n\n{"%"*40:<42}{i+1} of {len(l_sorted_days_lookbacks)} days_lookbacks{"%"*40:>42}\n')
    max_days_lookbacks = max(days_lookbacks)
    print(f"max_days_lookbacks: {max_days_lookbacks}\n")

    slice_start = -(max_days_lookbacks + drop_last_n_rows)
    slice_end = -drop_last_n_rows
    if drop_last_n_rows == 0:  # return df with all rows
        df = df_current[slice_start:].copy()
    else:  # return df with dropped drop_last_n_rows rows
        df = df_current[slice_start:slice_end].copy()


    print(f"df.head():\n{df.head()}\n")        
    print(f"dropped last {drop_last_n_rows} row(s) from df")
    print(f"df.tail():\n{df.tail()}\n")
    len_df = len(df)
    # len_df_current = len(df_current)
    print(f"len(df): {len(df)}")

verbose : True
store_results: True
n_samples: 1
days_lookbacks: [15, 30, 60, 120]
days_eval: 0
n_top_syms: 20
syms_start: 0
syms_end: 10
fp_df_picks: df_picks




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  1 of 2 days_lookbacks  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

max_days_lookbacks: 120

df.head():
                     A         AA    AAL         AAP        AAPL  ...       ZION        ZTO         ZTS       ZUMZ        ZWS
Date                                                              ...                                                        
2023-05-15  127.489014  36.724174  14.28  120.926422  171.837967  ...  23.635796  29.160000  183.008392  16.209999  21.389919
2023-05-16  125.795670  35.840565  14.20  116.355110  171.837967  ...  23.082058  28.500000  180.619370  15.940000  21.230812
2023-05-17  127.269875  36.475971  14.91  121.075165  172.457123  ...  25.869699  28.480000  178.120865  16.840000  21.897072
2023-05-18  128.634521  36.545467  15.05  120.460373  174.813934

In [9]:
print(f'df_current.head():\n{df_current.head()}')
print(f'df_current.tail():\n{df_current.tail()}')

df_current.head():
                    A         AA        AAL        AAP       AAPL  ...       ZION        ZTO        ZTS       ZUMZ        ZWS
Date                                                               ...                                                       
2017-11-22  65.633377  41.737015  47.427250  82.460930  41.415661  ...  38.969166  15.877837  68.290558  18.600000  23.360292
2017-11-24  66.273560  41.316128  47.329788  82.102081  41.418030  ...  38.969166  15.373776  68.501968  19.049999  23.098690
2017-11-27  66.225777  41.149719  47.193333  82.690956  41.209709  ...  39.028011  15.121749  68.742203  19.200001  23.156828
2017-11-28  67.448799  42.383041  47.973072  85.423698  40.968269  ...  40.432083  14.935061  68.598045  20.100000  23.525003
2017-11-29  65.633377  40.366657  48.002312  91.965736  40.118454  ...  41.609146  14.851050  68.809456  20.850000  23.835056

[5 rows x 1277 columns]
df_current.tail():
                     A         AA    AAL        AAP    

In [10]:
print(NYSE_dates_missing_in_df_picks)

['2023-10-18', '2023-10-16', '2023-10-13', '2023-10-11', '2023-10-10', '2023-10-09', '2023-10-03', '2023-09-27', '2023-09-26', '2023-09-19', '2023-09-07', '2023-09-06', '2023-08-31', '2023-08-30', '2023-08-29', '2023-08-28', '2023-08-24', '2023-08-23', '2023-08-22', '2023-08-21', '2023-08-16', '2023-07-31', '2023-07-28', '2023-07-24', '2023-07-19', '2023-07-17']


In [11]:
def drop_dates_after_input_date(df, input_date):
  """Drops dates after the input date in a Pandas DataFrame with a Date index.

  Args:
    df: A Pandas DataFrame with a Date index.
    input_date: A datetime object.

  Returns:
    A Pandas DataFrame with the dates after the input date dropped.
  """

  # Get the index of the input date in the DataFrame.
  input_date_index = df.index.get_loc(input_date)

  # Drop the rows after the input date.
  df = df.iloc[:input_date_index + 1]

  return df

In [12]:
print(NYSE_dates_missing_in_df_picks)

['2023-10-18', '2023-10-16', '2023-10-13', '2023-10-11', '2023-10-10', '2023-10-09', '2023-10-03', '2023-09-27', '2023-09-26', '2023-09-19', '2023-09-07', '2023-09-06', '2023-08-31', '2023-08-30', '2023-08-29', '2023-08-28', '2023-08-24', '2023-08-23', '2023-08-22', '2023-08-21', '2023-08-16', '2023-07-31', '2023-07-28', '2023-07-24', '2023-07-19', '2023-07-17']


In [13]:
for i, date in enumerate(NYSE_dates_missing_in_df_picks):
  NYSE_dates_missing=NYSE_dates_missing_in_df_picks[i]
  df_ = drop_dates_after_input_date(df=df_current, input_date=NYSE_dates_missing)
  print(f'i: {i}, {df_.index[-1]}')

i: 0, 2023-10-18 00:00:00
i: 1, 2023-10-16 00:00:00
i: 2, 2023-10-13 00:00:00
i: 3, 2023-10-11 00:00:00
i: 4, 2023-10-10 00:00:00
i: 5, 2023-10-09 00:00:00
i: 6, 2023-10-03 00:00:00
i: 7, 2023-09-27 00:00:00
i: 8, 2023-09-26 00:00:00
i: 9, 2023-09-19 00:00:00
i: 10, 2023-09-07 00:00:00
i: 11, 2023-09-06 00:00:00
i: 12, 2023-08-31 00:00:00
i: 13, 2023-08-30 00:00:00
i: 14, 2023-08-29 00:00:00
i: 15, 2023-08-28 00:00:00
i: 16, 2023-08-24 00:00:00
i: 17, 2023-08-23 00:00:00
i: 18, 2023-08-22 00:00:00
i: 19, 2023-08-21 00:00:00
i: 20, 2023-08-16 00:00:00
i: 21, 2023-07-31 00:00:00
i: 22, 2023-07-28 00:00:00
i: 23, 2023-07-24 00:00:00
i: 24, 2023-07-19 00:00:00
i: 25, 2023-07-17 00:00:00


In [14]:
df_ = df_current.copy()
df_.tail(20)

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,...,ZION,ZTO,ZTS,ZUMZ,ZWS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-10-12,108.940002,26.78558,12.06,49.970001,180.710007,...,35.200001,24.049999,173.893646,16.5,26.940001
2023-10-13,110.510002,26.466942,11.72,50.75,178.850006,...,34.57,24.15,173.963486,15.81,26.530001
2023-10-16,111.5,26.695963,11.94,53.59,178.720001,...,35.919998,24.01,174.761566,16.860001,26.82
2023-10-17,112.400002,27.65188,11.94,54.68,177.149994,...,36.740002,24.01,173.843765,17.690001,27.040001
2023-10-18,108.610001,26.407198,11.36,53.240002,175.839996,...,35.689999,23.65,169.13504,17.15,25.870001
2023-10-19,109.510002,24.136894,11.45,51.299999,175.460007,...,32.240002,23.389999,167.638626,17.219999,25.780001
2023-10-20,109.230003,23.907873,11.08,51.200001,172.880005,...,29.959999,23.610001,166.690903,17.01,25.59
2023-10-23,109.339996,23.310425,11.29,48.450001,173.0,...,30.049999,23.370001,166.10231,16.74,25.49
2023-10-24,105.639999,23.340298,11.21,48.25,173.440002,...,29.860001,23.790001,166.720825,16.74,25.48
2023-10-25,103.400002,23.738596,11.04,49.84,171.100006,...,29.629999,23.75,163.279068,16.77,25.389999


In [30]:
def check_date(input_date, last_row_date, dates_NYSE):
    if input_date > last_row_date:
        raise ValueError("Error: input_date should not be later than last_row_date")
    if input_date not in dates_NYSE:
        raise ValueError(f"Error: input_date is not a NYSE trading date between {dates_NYSE[0]} and {dates_NYSE[-1]}")        

In [32]:
import sys

input_date = '2023-10-29'
last_row_date = df_.index[-1].strftime('%Y-%m-%d')

try:
    check_date(input_date, last_row_date, dates_NYSE)
except ValueError as e:
    print(e)
    sys.exit(1)

iloc_last_row = len(df_) - 1
iloc_input_date = df_.index.get_loc(input_date)
rows_drop = iloc_last_row - iloc_input_date
print(f'last_row_date: {last_row_date}')
print(f'iloc_last_row: {iloc_last_row}, iloc_input_date: {iloc_input_date}, rows to drop: {rows_drop}')


Error: input_date is not a NYSE trading date between 2023-03-15 and 2023-11-08


AttributeError: 'tuple' object has no attribute 'tb_frame'

### TODO integrate above code to yf_2get_picks_trash.py

## Add picks from missing dates to df_picks

In [17]:
df_model_picks_results = pickle_load(path_data_dump, 'df_model_picks_results')
print(f'df_model_picks_results, len({len(df_model_picks_results)}):\n{df_model_picks_results}\n')

df_model_picks_results, len(125):
           date    days_lookback    syms_freq          symbols    date_buy  ... sh_SPY_sell  $_SPY_sell %_SPY_chg  dif_%_chg win
4    2023-09-01    [30, 60, 120]   sym_freq_9          ['SHV']  2023-09-05  ...       [2.0]  891.039978 -0.828065   0.909835   1
5    2023-09-01    [30, 60, 120]   sym_freq_8  ['FTSM', 'ST...  2023-09-05  ...       [2.0]  891.039978 -0.828065  -0.427302   0
6    2023-08-25    [30, 60, 120]   sym_freq_9          ['SHV']  2023-08-28  ...       [2.0]  900.700012  1.714246  -1.623670   0
7    2023-08-25    [30, 60, 120]   sym_freq_8         ['STRL']  2023-08-28  ...       [2.0]  900.700012  1.714246   2.833503   1
8    2023-08-25  [15, 30, 60,...  sym_freq_12          ['SHV']  2023-08-28  ...       [2.0]  900.700012  1.714246  -1.623670   0
..          ...              ...          ...              ...         ...  ...         ...         ...       ...        ...  ..
124  2023-03-20    [30, 60, 120]   sym_freq_9  ['FTSM', 'SHV'] 

In [18]:
df_model_picks = pickle_load(path_data_dump, 'df_model_picks')
print(f'df_model_picks, len({len(df_model_picks)}):\n{df_model_picks}')

df_model_picks, len(4):
         date  days_lookback   syms_freq          symbols    date_buy  ... sh_SPY_sell  $_SPY_sell %_SPY_chg  dif_%_chg win
0  2023-09-08  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']        None  ...        None         NaN       NaN        NaN   0
1  2023-09-08  [30, 60, 120]  sym_freq_8  ['DELL', 'ST...        None  ...        None         NaN       NaN        NaN   0
2  2023-09-05  [30, 60, 120]  sym_freq_9          ['SHV']  2023-09-06  ...        None         NaN       NaN        NaN   0
3  2023-09-05  [30, 60, 120]  sym_freq_8         ['FTSM']  2023-09-06  ...        None         NaN       NaN        NaN   0

[4 rows x 18 columns]
