In [7]:
# # create empty DataFrame df_picks and append row
# my_cols = ['date', 'days_lookback', 'syms_freq', 'symbols']
# # Creating Empty DataFrame and Storing it in variable df_picks
# df_model_top_picks = pd.DataFrame(columns=my_cols)
# df_model_top_picks

In [8]:
import pandas as pd
from itertools import product
from ast import literal_eval
from myUtils import pickle_load, pickle_dump

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_colwidth', 30)
pd.set_option('display.width', 900)

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

fp_df_picks  = f'df_picks'  # stock picks by criteria: CAGR/UI, CAGR/rtn_std, rtd/UI
fp_df_model_top_picks = f'df_model_top_picks'  # top stock picks from model developed by back test

verbose = True
# verbose = False

#### Clean df_picks_pick data:<br>- drop duplicates<br>- sort on date<br>- re-index<br>- save

In [9]:
df_picks = pickle_load(path_data_dump, fp_df_picks)
df_picks = df_picks.drop_duplicates(subset=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], keep='last')
df_picks = df_picks.sort_values(by=['date_end_df_train', 'max_days_lookbacks', 'days_lookbacks'], ascending=False)
df_picks = df_picks.reset_index(drop=True)
pickle_dump(df_picks, path_data_dump, fp_df_picks)
print(f'len(df_picks): {len(df_picks)}')
print(df_picks.head())

len(df_picks): 384
  date_end_df_train  max_days_lookbacks     days_lookbacks sym_freq_15 sym_freq_14 sym_freq_13 sym_freq_12 sym_freq_11 sym_freq_10 sym_freq_9      sym_freq_8              sym_freq_7       sym_freq_6                     sym_freq_5                     sym_freq_4                     sym_freq_3 sym_freq_2
0        2023-12-15                 120      [30, 60, 120]          []          []          []          []          []          []         []         ['SHV']         ['FTSM', 'GPS']               []         ['ANF', 'HIBB', 'LRN']          ['MARA', 'MBI', 'SQ']  ['AMKR', 'AXGN', 'BPMC', '...         []
1        2023-12-15                 120  [15, 30, 60, 120]          []          []          []          []          []     ['SHV']         []              []  ['FTSM', 'GPS', 'MBI']   ['HA', 'MARA']  ['ANF', 'HIBB', 'LRN', 'SQ...          ['CG', 'RUN', 'TNDM']  ['AMKR', 'AXGN', 'BA', 'BP...         []
2        2023-12-14                 120      [30, 60, 120]          []  

#### Get dates in df_picks where values in column "days_lookbacks" matched items in my_days_lookbacks.

In [10]:
my_days_lookbacks = [[30, 60, 120], [15, 30, 60, 120]]

# get list of dates correspond to dates in my_days_lookbacks
# e.g. ['2023-12-15', ... ,  '2023-03-15']
dates_in_days_lookbacks = []
for my_days_lookback in my_days_lookbacks:
  mask_days_lookback = (df_picks['days_lookbacks'] == str(my_days_lookback))
  l_dates = df_picks.loc[mask_days_lookback, 'date_end_df_train'].tolist()     
  dates_in_days_lookbacks.append(l_dates)
# get dates in sublists into one list
dates_in_days_lookbacks = [val for sublist in dates_in_days_lookbacks for val in sublist]
# get unique dates and sort newest to oldest
dates_in_days_lookbacks = sorted(list(set(dates_in_days_lookbacks)), reverse=True)

#### Create list of tuples of all combination of dates_in_days_lookbacks, my_days_lookbacks, my_cols<br> e.g. [('2023-12-15', [30, 60, 120], 'sym_freq_12'), ... , [15, 30, 60, 120], 'sym_freq_8')]

In [11]:
my_cols = ['sym_freq_12', 'sym_freq_9', 'sym_freq_8']

# list of tuples of all combination of dates_in_days_lookbacks, my_days_lookbacks, my_cols
# e.g. [('2023-12-15', [30, 60, 120], 'sym_freq_12'), ... , [15, 30, 60, 120], 'sym_freq_8')]
d_lbk_freq = list(product(dates_in_days_lookbacks, my_days_lookbacks, my_cols))

In [12]:
print(f'Model predicts the best performing symbols are from:')
print(f'1st    {"[30, 60, 120]":<20}{"sym_freq_9":<14}{"days_eval = 4":<20}')
print(f'2nd    {"[15, 30, 60, 120]":<20}{"sym_freq_12":<14}{"days_eval = 5":<20}')
print(f'3rd    {"[30, 60, 120]":<20}{"sym_freq_8":<14}{"days_eval = 4":<20}')

Model predicts the best performing symbols are from:
1st    [30, 60, 120]       sym_freq_9    days_eval = 4       
2nd    [15, 30, 60, 120]   sym_freq_12   days_eval = 5       
3rd    [30, 60, 120]       sym_freq_8    days_eval = 4       


#### Get symbols in df_picks column in my_cols where values in columns "date_end_df_picks_train", "days_lookbacks" matched dates_in_days_lookbacks and my_days_lookbacks.

In [13]:
# picks, e.g.: [(0, []), (1, []), (2, ['SHV']), (3, []), ... , (998, ['ELF']), (999, []), ...]
picks = []
prev_date = ''

for i, item in enumerate(d_lbk_freq):
  _date, my_days_lookback, my_col = item[0], item[1], item[2]
  mask_date_n_days_lookback = (df_picks['date_end_df_train'] == _date) & (df_picks['days_lookbacks'] == str(my_days_lookback))  
  list_in_df_picks = df_picks.loc[mask_date_n_days_lookback, my_col]  # pandas series, e.g. 0 ['AMPH', 'FCN']
  my_str = list_in_df_picks.tolist()[0]  # e.g. string: "['AMPH', 'FCN']"
  my_list = literal_eval(my_str)  # e.g. list: ['AMPH', 'FCN']

  # print symbols picked by the model
  if _date != prev_date:
    if prev_date == '':
      print('\nModel Results:')  
      print(f'{i:<6}{_date:<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}')
    else:
      print(f'\n{i:<6}{_date:<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}') 
  else:
    print(f'{i:<6}{" ":<14}{str(my_days_lookback):<21}{my_col:<11}: {my_list}')

  picks.append((i, my_list))
  prev_date = _date


Model Results:
0     2023-12-15    [30, 60, 120]        sym_freq_12: []
1                   [30, 60, 120]        sym_freq_9 : []
2                   [30, 60, 120]        sym_freq_8 : ['SHV']
3                   [15, 30, 60, 120]    sym_freq_12: []
4                   [15, 30, 60, 120]    sym_freq_9 : []
5                   [15, 30, 60, 120]    sym_freq_8 : []

6     2023-12-14    [30, 60, 120]        sym_freq_12: []
7                   [30, 60, 120]        sym_freq_9 : []
8                   [30, 60, 120]        sym_freq_8 : []
9                   [15, 30, 60, 120]    sym_freq_12: []
10                  [15, 30, 60, 120]    sym_freq_9 : []
11                  [15, 30, 60, 120]    sym_freq_8 : []

12    2023-12-13    [30, 60, 120]        sym_freq_12: []
13                  [30, 60, 120]        sym_freq_9 : []
14                  [30, 60, 120]        sym_freq_8 : ['GPS', 'SHV']
15                  [15, 30, 60, 120]    sym_freq_12: []
16                  [15, 30, 60, 120]    sym_freq_9 :

#### Create iterable z_items of dates_in_days_lookbacks, my_days_lookbacks, my_cols, and picks, e.g.:<br>(('2023-12-15', [30, 60, 120], 'sym_freq_12'), (0, []))<br>(('2023-12-15', [30, 60, 120], 'sym_freq_9'), (1, []))<br>….<br>(('2023-03-15', [15, 30, 60, 120], 'sym_freq_9'), (1150, [SGEN']))<br>(('2023-03-15', [15, 30, 60, 120], 'sym_freq_8'), (1151, ['AMPH', 'FCN']))

In [14]:
# z_items, e.g.:
# (('2023-12-14', [30, 60, 120], 'sym_freq_12'), (0, []))
# ...
# (('2023-03-15', [15, 30, 60, 120], 'sym_freq_8'), (1145, ['AMPH', 'FCN']))
z_items = zip(d_lbk_freq, picks)

#### Criteria of the best stocks from the model

In [15]:
criteria_model_top_picks = [([30, 60, 120], 'sym_freq_9'), ([15, 30, 60, 120], 'sym_freq_12'), ([30, 60, 120], 'sym_freq_8')]

In [16]:
df_model_top_picks = pickle_load(path_data_dump, fp_df_model_top_picks)
print(f'len(df_model_top_picks): {len(df_model_top_picks)}')
print(df_model_top_picks.head(), '\n')
print(df_model_top_picks.tail())


len(df_model_top_picks): 290
         date  days_lookback   syms_freq         symbols
0  2023-12-15  [30, 60, 120]  sym_freq_8         ['SHV']
1  2023-12-13  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV']
2  2023-12-12  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV']
3  2023-12-11  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV']
4  2023-12-08  [30, 60, 120]  sym_freq_8  ['GPS', 'SHV'] 

           date  days_lookback   syms_freq          symbols
285  2023-03-20  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
286  2023-03-17  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
287  2023-03-17  [30, 60, 120]  sym_freq_8           ['GE']
288  2023-03-16  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
289  2023-03-15  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']


#### Add picks to df_model_top_picks if dlb_sym_freq match criteria_model_top_picks and _syms has symbol(s)  

In [17]:
for i, item in enumerate(z_items):
  _date, _days_lookback, _sym_freq, _syms = item[0][0], item[0][1], item[0][2], item[1][1]
  len_df_model_top_picks = len(df_model_top_picks)
  print(f'{i}, {_date}, {_days_lookback}, {_sym_freq}, {_syms}')  
  dlb_sym_freq = (_days_lookback, _sym_freq)
  # add data if dlb_sym_freq in criteria_model_top_picks and _syms has symbol(s)  
  if dlb_sym_freq in criteria_model_top_picks and _syms:
    row_add = [_date, _days_lookback, _sym_freq, _syms]
    df_model_top_picks.loc[len_df_model_top_picks] = row_add    
    print(f'\nadded row {len_df_model_top_picks} to df_model_top_picks:\n{row_add}\n')


0, 2023-12-15, [30, 60, 120], sym_freq_12, []
1, 2023-12-15, [30, 60, 120], sym_freq_9, []
2, 2023-12-15, [30, 60, 120], sym_freq_8, ['SHV']

added row 290 to df_model_top_picks:
['2023-12-15', [30, 60, 120], 'sym_freq_8', ['SHV']]

3, 2023-12-15, [15, 30, 60, 120], sym_freq_12, []
4, 2023-12-15, [15, 30, 60, 120], sym_freq_9, []
5, 2023-12-15, [15, 30, 60, 120], sym_freq_8, []
6, 2023-12-14, [30, 60, 120], sym_freq_12, []
7, 2023-12-14, [30, 60, 120], sym_freq_9, []
8, 2023-12-14, [30, 60, 120], sym_freq_8, []
9, 2023-12-14, [15, 30, 60, 120], sym_freq_12, []
10, 2023-12-14, [15, 30, 60, 120], sym_freq_9, []
11, 2023-12-14, [15, 30, 60, 120], sym_freq_8, []
12, 2023-12-13, [30, 60, 120], sym_freq_12, []
13, 2023-12-13, [30, 60, 120], sym_freq_9, []
14, 2023-12-13, [30, 60, 120], sym_freq_8, ['GPS', 'SHV']

added row 291 to df_model_top_picks:
['2023-12-13', [30, 60, 120], 'sym_freq_8', ['GPS', 'SHV']]

15, 2023-12-13, [15, 30, 60, 120], sym_freq_12, []
16, 2023-12-13, [15, 30, 60, 120

#### Cleanup df_model_top_picks<br>- convert data to str<br>- drop duplicates, keep last value<br>- sort date, newest first<br>- re-index

In [18]:
df_model_top_picks = df_model_top_picks.astype(str)
df_model_top_picks = df_model_top_picks.drop_duplicates(keep='last')
df_model_top_picks = df_model_top_picks.sort_values(by=['date', 'days_lookback', 'syms_freq'], ascending=False)
df_model_top_picks = df_model_top_picks.reset_index(drop=True)
pickle_dump(df_model_top_picks, path_data_dump, fp_df_model_top_picks)
print(f'len(df_model_top_picks): {len(df_model_top_picks)}')
print(f'{fp_df_model_top_picks}:\n{df_model_top_picks}')

len(df_model_top_picks): 290
df_model_top_picks:
           date  days_lookback   syms_freq          symbols
0    2023-12-15  [30, 60, 120]  sym_freq_8          ['SHV']
1    2023-12-13  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
2    2023-12-12  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
3    2023-12-11  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
4    2023-12-08  [30, 60, 120]  sym_freq_8   ['GPS', 'SHV']
..          ...            ...         ...              ...
285  2023-03-20  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
286  2023-03-17  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
287  2023-03-17  [30, 60, 120]  sym_freq_8           ['GE']
288  2023-03-16  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']
289  2023-03-15  [30, 60, 120]  sym_freq_9  ['FTSM', 'SHV']

[290 rows x 4 columns]


In [19]:
def get_NYSE_date_n(date, dates_NYSE, n, verbose=False):

  if date in dates_NYSE:
    idx_last = len(dates_NYSE) - 1  # last index of dates_NYSE
    idx_date = dates_NYSE.get_loc(date)  # index of date
    idx_date_n = idx_date + n  # index of date_n
    if verbose:
      print(f"date: {date} is in dates_NYSE,  date's position in dates_NYSE is: {idx_date} of {idx_last},  n: {n},  idx_date_n: {idx_date_n},")
    if 0 <= idx_date_n <= idx_last:  # idx_date_n is within bounds of dates_NYSE
      date_n = dates_NYSE[idx_date_n]
      if verbose:      
        print(f'idx_date_n: {idx_date_n} is wiithin bounds of dates_NYSE ("0 to {idx_last}), date_n: {date_n}"\n')
    else:  # idx_date_n is out-of-bounds of dates_NYSE
      date_n = None   
      if verbose:  
        print(f'idx_date_n: {idx_date_n} is out-of-bounds of dates_NYSE ("0 to {idx_last})"\n')
  else:  # date not in dates_NYSE
      date_n = None  
      if verbose:  
        print(f'date: {date} is not in dates_NYSE\n')

  return date_n  

In [20]:
def get_portf_buy(df_picks_close, date, str_symbols, portf_target, verbose=False):
    import numpy as np
    from ast import literal_eval    
    l_syms = literal_eval(str_symbols)  # convert list stored as str back to list
    # array of closing prices corresponding to symbols in l_syms    
    ar_price = df_picks_close.loc[date][l_syms].values  
    sym_cnt = len(l_syms)  # number of symbols
    amt_per_sym = portf_target / sym_cnt  # target dollar investment in each symbol
    ar_sym_share = np.floor(amt_per_sym / ar_price)  # array of shares for each symbol
    ar_sym_amt = ar_price * ar_sym_share  # array of actual dollar amount invested in each symbol
    portf_value = sum(ar_sym_amt)  # total actual portfolio value
    if verbose:
        # print(date, l_syms, ar_price, ar_sym_share, ar_sym_amt, portf_value)    
        print(f'{date = }, {l_syms = }, {ar_price = }, {ar_sym_share = }, {ar_sym_amt = }, {portf_value = }')            
        print(f'{date} {portf_value = }')
    return date, l_syms, ar_price, ar_sym_share, ar_sym_amt, portf_value

In [21]:
def get_SPY_buy(df_picks_close, date, portf_target, symbol="SPY", verbose=False):   
    import numpy as np
    price = df_picks_close.loc[date][symbol]    
    share = np.floor(portf_target / price)  # array of shares for each symbol
    value = price * share  # array of actual dollar amount invested in each symbol
    if verbose:
        # print(date, symbol, price, share, value)
        print(f'{date = }, {symbol = }, {price = }, {share = }, {value = }')                        
        print(f'{date} {symbol} {value = }')
    return date, price, share, value

### TODO: 2023-12-14, Replace dates_NYSE with dates in df_picks_close, no start_date or end_date needed

In [22]:
df_picks_mp = df_model_top_picks.copy()
df_picks_close = pickle_load(path_data_dump, 'df_close_clean')

In [23]:
dates_sorted = sorted(df_picks_mp.date.tolist())
date_start_picks_mp = dates_sorted[0]
date_end_picks_mp = dates_sorted[-1]
print(date_start_picks_mp, date_end_picks_mp) 

2023-03-15 2023-12-15


In [24]:
# Create a boolean mask for rows between date1 and date2 (inclusive)
mask = (df_picks_close.index >= date_start_picks_mp) & (df_picks_close.index <= date_end_picks_mp)
# mask = (df_picks_close >= date_start_picks_mp) & (df_picks_close <= date_end_picks_mp)

# Select rows using the mask
filtered_df = df_picks_close.loc[mask]
filtered_df
# mask

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,AB,ABBV,ABR,ABT,ACGL,ACHC,ACIW,ACLS,ACRS,ADBE,...,XRX,XYL,YELP,YUM,YUMC,YY,ZBH,ZBRA,ZD,ZG,ZION,ZTO,ZTS,ZUMZ,ZWS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-03-15,133.288239,39.372566,13.86,119.255257,152.371918,32.977055,149.485596,10.526908,96.316017,62.740002,68.459999,22.700001,123.860001,7.990,333.609985,...,14.261086,95.604477,29.170000,125.356514,59.979794,26.028101,124.543419,287.739990,73.779999,39.919998,29.475050,25.606165,162.472290,19.500000,20.806547
2023-03-16,136.062805,40.342766,14.12,119.509888,155.220352,33.193268,150.688766,10.499517,97.527351,66.070000,70.230003,25.780001,129.960007,8.120,353.290009,...,14.451233,96.208496,29.780001,126.273537,60.515686,26.922306,124.712440,294.929993,74.510002,40.849998,30.818754,27.639652,165.193893,19.250000,20.796629
2023-03-17,132.442932,38.758762,13.98,116.934204,154.373779,32.883053,149.640839,10.216486,95.538002,63.150002,69.459999,26.940001,128.220001,7.890,358.140015,...,13.899805,93.802299,29.330000,125.080414,60.257668,27.720362,123.598854,288.709991,74.129997,39.810001,28.736017,28.083862,163.366241,18.540001,20.360270
2023-03-20,133.795395,39.877464,13.96,116.229065,156.764084,33.024059,151.484421,10.791677,96.384964,65.610001,70.449997,26.150000,131.509995,7.950,362.880005,...,14.118475,96.693703,29.690001,127.082108,60.485916,28.028044,125.607292,290.839996,74.650002,39.830002,28.966366,27.659395,164.707199,18.110001,20.657787
2023-03-21,136.251740,41.491177,14.37,117.316147,158.636505,33.334270,152.115128,10.873848,96.837975,67.209999,70.959999,26.200001,133.449997,8.240,374.220001,...,14.546309,97.555176,30.059999,126.756706,60.704235,27.489597,127.058952,294.429993,75.570000,41.939999,30.991514,27.866692,165.124374,18.549999,21.004896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-11,128.970001,24.930000,13.72,57.080002,193.179993,29.820000,151.240005,14.100000,106.220001,79.610001,74.959999,28.000000,125.580002,0.973,625.200012,...,15.170000,107.699997,44.099998,126.629997,39.919998,36.939999,117.980003,239.649994,63.730000,43.820000,38.630001,21.020000,189.460007,18.969999,29.610001
2023-12-12,128.789993,24.049999,14.04,56.250000,194.710007,30.559999,153.240005,14.190000,106.680000,80.279999,75.910004,28.320000,126.190002,1.010,633.659973,...,15.710000,107.940002,44.480000,128.070007,40.320000,39.029999,117.449997,239.380005,62.560001,45.419998,38.130001,21.020000,191.470001,18.570000,29.230000
2023-12-13,133.740005,25.990000,14.09,60.830002,197.960007,30.809999,154.300003,14.880000,107.250000,78.379997,78.959999,29.240000,131.500000,1.090,624.260010,...,16.709999,109.180000,44.490002,130.929993,39.750000,38.500000,117.900002,244.429993,64.720001,50.279999,41.820000,20.400000,197.410004,19.670000,29.320000
2023-12-14,137.960007,29.900000,14.59,63.970001,198.110001,31.870001,154.880005,15.700000,108.599998,74.669998,77.580002,29.639999,140.429993,1.060,584.640015,...,17.580000,110.930000,45.360001,131.110001,40.160000,38.799999,118.529999,272.160004,66.320000,54.500000,45.669998,20.309999,200.089996,19.930000,29.610001


In [25]:
df_picks_close.index

DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12', '2018-01-16',
               ...
               '2023-12-04', '2023-12-05', '2023-12-06', '2023-12-07', '2023-12-08', '2023-12-11', '2023-12-12', '2023-12-13', '2023-12-14', '2023-12-15'], dtype='datetime64[ns]', name='Date', length=1500, freq=None)

In [26]:
import pandas_market_calendars as mcal
nyse = mcal.get_calendar('NYSE')
dates_NYSE_2013_2023 = nyse.valid_days(start_date='2013-01-01', end_date='2023-12-31').strftime('%Y-%m-%d')
print(f'len(dates_NYSE_2013_2023): {len(dates_NYSE_2013_2023)}')
print(f'dates_NYSE_2013_2023[0]:  {dates_NYSE_2013_2023[0]}, dates_NYSE_2013_2023[-1]: {dates_NYSE_2013_2023[-1]}')
print(f'{dates_NYSE_2013_2023}') 

len(dates_NYSE_2013_2023): 2768
dates_NYSE_2013_2023[0]:  2013-01-02, dates_NYSE_2013_2023[-1]: 2023-12-29
Index(['2013-01-02', '2013-01-03', '2013-01-04', '2013-01-07', '2013-01-08', '2013-01-09', '2013-01-10', '2013-01-11', '2013-01-14', '2013-01-15',
       ...
       '2023-12-15', '2023-12-18', '2023-12-19', '2023-12-20', '2023-12-21', '2023-12-22', '2023-12-26', '2023-12-27', '2023-12-28', '2023-12-29'], dtype='object', length=2768)


In [27]:
# from ast import literal_eval    
# l_syms = literal_eval(syms)  # convert list stored as str back to list

# for _sym in l_syms:
#   ans = _sym in df_picks_close.columns
#   print(_sym, ans)

In [28]:
def all_in_list(list1, list2):
  """
  Checks if all items in list1 are also in list2.

  Args:
    list1: A list of items.
    list2: Another list of items.

  Returns:
    True if all items in list1 are in list2, False otherwise.
  """
  return set(list1).issubset(set(list2))

In [29]:
def any_not_in_list(list1, list2):
  """
  Checks if any items in list1 are not in list2.

  Args:
    list1: A list of items.
    list2: Another list of items.

  Returns:
    True if any item in list1 is not in list2, False if all are present.
  """
  return bool(set(list1) - set(list2))

In [30]:
# l_syms = ['FTSM', 'SHV']
l_syms = ['GPS', 'SHV']
# l_syms = ['ISEE', 'SHV']
# l_syms = ['SHV', 'ISEE']
# l_syms = ['ISEE', 'ISEE']
# l_syms = ['SHV', 'SHV']
# l_syms = ['SHV', 'ISEE', 'SHV']
# l_syms = ['AAPL', 'NVDA']
# l_syms = ['AAPL', 'NVDA', 'SHV', 'AAPL', 'NVDA', 'SHV']
# ans = all_in_list(l_syms, df_picks_close.columns)
ans = any_not_in_list(l_syms, df_picks_close.columns)
print(l_syms, ans)

['GPS', 'SHV'] False


In [31]:
print(any_not_in_list(l_syms, df_picks_close.columns))
print(l_syms)

False
['GPS', 'SHV']


In [32]:
False or False
False or True
# True or True

True

In [33]:
import numpy as np
# from ast import literal_eval
z_date_syms = zip(df_picks_mp.date, df_picks_mp.symbols)

idx_close = df_picks_close.index.strftime('%Y-%m-%d')
symbols_df_picks_close = df_picks_close.columns  # symbols in df_picks_close

n = 1  # n=1 is the buy date
# n = 4  # n=4 is the sell date

date_buy = []  # buy date of portfolio
shares_syms = []  # lists of shares of each symbol brought on date_buy
value_portf = [] # list of porfolio value on date_buy
shares_SPY = []  # list of shares of SPY brought on date_buy
value_SPY = []  # list of value of SPY shares on date_buy 
for date, syms in z_date_syms:
    next_date_n = get_NYSE_date_n(date, dates_NYSE_2013_2023, n, verbose=verbose)

    if next_date_n in idx_close:
        close_date_n = next_date_n
        print(f'date: {date},  n: {n},  next_date_n: {next_date_n} is in df_picks_close')
        print(f'picks for {date}: {syms}')      
    else:
        close_date_n = None
        print(f'date: {date},  n: {n},  next_date_n: {next_date_n} is not in df_picks_close')
        print(f'picks for {date}: {syms}')        

    l_syms = literal_eval(syms)  # convert list stored as str back to list
    # True if symbol(s) in l_syms is not a column in df_picks_close
    sym_not_in_df_picks_close = any_not_in_list(l_syms, symbols_df_picks_close)

    if close_date_n is None or sym_not_in_df_picks_close:
        p_date = None  # portf. buy date
        p_l_syms = None  # portf. list of symbols
        p_ar_price = None  # portf. list of symbols' close
        p_ar_sym_share = None  # portf. list of symbols' share
        p_ar_sym_amt = None  # portf. list of symbols' dollar value 
        p_portf_value = None  # portf. value is none, data not available in df_picks_close
        
        s_date = None  # SPY buy date
        s_price = None # SPY list of symbols' close
        s_share = None # SPY list of symbols' share
        s_value = None  # SPY value is none, data not available in df_picks_close

        date_buy.append(p_date)
        shares_syms.append(p_l_syms)
        value_portf.append(p_portf_value)
        shares_SPY.append(s_share)
        value_SPY.append(s_value)    
        
        print(f"No data for next_date_n {next_date_n}, pick's portf value = None")
        print(f'No data for next_date_n {next_date_n}, SPY portf value =    None')

    else:    
        p_date, p_l_syms, p_ar_price, p_ar_sym_share, p_ar_sym_amt, p_portf_value = \
            get_portf_buy(df_picks_close, next_date_n, syms, portf_target=1000, verbose=verbose)
        s_date, s_price, s_share, s_value = \
            get_SPY_buy(df_picks_close, next_date_n, portf_target=1000, verbose=verbose)
                
        date_buy.append(p_date)
        shares_syms.append(p_ar_sym_share)
        value_portf.append(p_portf_value)
        shares_SPY.append(s_share)
        value_SPY.append(s_value)

        print(f"next_date_n pick's portf value = {p_portf_value}")
        print(f'next_date_n SPY portf value =    {s_value}')

    print('='*20, '\n')       
    

date: 2023-12-15 is in dates_NYSE,  date's position in dates_NYSE is: 2758 of 2767,  n: 1,  idx_date_n: 2759,
idx_date_n: 2759 is wiithin bounds of dates_NYSE ("0 to 2767), date_n: 2023-12-18"

date: 2023-12-15,  n: 1,  next_date_n: 2023-12-18 is not in df_picks_close
picks for 2023-12-15: ['SHV']
No data for next_date_n 2023-12-18, pick's portf value = None
No data for next_date_n 2023-12-18, SPY portf value =    None

date: 2023-12-13 is in dates_NYSE,  date's position in dates_NYSE is: 2756 of 2767,  n: 1,  idx_date_n: 2757,
idx_date_n: 2757 is wiithin bounds of dates_NYSE ("0 to 2767), date_n: 2023-12-14"

date: 2023-12-13,  n: 1,  next_date_n: 2023-12-14 is in df_picks_close
picks for 2023-12-13: ['GPS', 'SHV']
date = '2023-12-14', l_syms = ['GPS', 'SHV'], ar_price = array([ 21.29999924, 109.86000061]), ar_sym_share = array([23.,  4.]), ar_sym_amt = array([489.89998245, 439.44000244]), portf_value = 929.3399848937988
2023-12-14 portf_value = 929.3399848937988
date = '2023-12-14', 

#### TODO: 2023-12-17 skip syms that are not in df_picks_close, these symbols are no-longer listed

#### TODO: 2023-07-15 only evaluate performance of df_modelpicks, after evaluation, append results to df_modelpicks_results 

#### 2023-07-11 got KeyError: "['ISEE'] not in index". ISEE was in past model picks, but since then ISEE was acquired. The symbol was dropped from df_close_clean. Current program recalculate the performance of all the past picks. Rewrite program such that performance of past picks with win records are not recalculated.

In [34]:
# import numpy as np
# # from ast import literal_eval
# z_date_syms = zip(df_picks_mp.date, df_picks_mp.symbols)

# idx_close = df_picks_close.index.strftime('%Y-%m-%d')

# n = 1  # n=1 is the buy date
# # n = 4  # n=4 is the sell date

# date_buy = []  # buy date of portfolio
# shares_syms = []  # lists of shares of each symbol brought on date_buy
# value_portf = [] # list of porfolio value on date_buy
# shares_SPY = []  # list of shares of SPY brought on date_buy
# value_SPY = []  # list of value of SPY shares on date_buy 
# for date, syms in z_date_syms:
#     next_date_n = get_NYSE_date_n(date, dates_NYSE_2013_2023, n, verbose=verbose)

#     if next_date_n in idx_close:
#         close_date_n = next_date_n
#         print(f'date: {date},  n: {n},  next_date_n: {next_date_n} is in df_picks_close')
#         print(f'picks for {date}: {syms}')      
#     else:
#         close_date_n = None
#         print(f'date: {date},  n: {n},  next_date_n: {next_date_n} is not in df_picks_close')
#         print(f'picks for {date}: {syms}')        

#     if close_date_n is None:
#         p_date = None  # portf. buy date
#         p_l_syms = None  # portf. list of symbols
#         p_ar_price = None  # portf. list of symbols' close
#         p_ar_sym_share = None  # portf. list of symbols' share
#         p_ar_sym_amt = None  # portf. list of symbols' dollar value 
#         p_portf_value = None  # portf. value is none, data not available in df_picks_close
        
#         s_date = None  # SPY buy date
#         s_price = None # SPY list of symbols' close
#         s_share = None # SPY list of symbols' share
#         s_value = None  # SPY value is none, data not available in df_picks_close

#         date_buy.append(p_date)
#         shares_syms.append(p_l_syms)
#         value_portf.append(p_portf_value)
#         shares_SPY.append(s_share)
#         value_SPY.append(s_value)    
        
#         print(f"No data for next_date_n {next_date_n}, pick's portf value = None")
#         print(f'No data for next_date_n {next_date_n}, SPY portf value =    None')

#     else:    
#         p_date, p_l_syms, p_ar_price, p_ar_sym_share, p_ar_sym_amt, p_portf_value = \
#             get_portf_buy(df_picks_close, next_date_n, syms, portf_target=1000, verbose=verbose)
#         s_date, s_price, s_share, s_value = \
#             get_SPY_buy(df_picks_close, next_date_n, portf_target=1000, verbose=verbose)
                
#         date_buy.append(p_date)
#         shares_syms.append(p_ar_sym_share)
#         value_portf.append(p_portf_value)
#         shares_SPY.append(s_share)
#         value_SPY.append(s_value)

#         print(f"next_date_n pick's portf value = {p_portf_value}")
#         print(f'next_date_n SPY portf value =    {s_value}')

#     print('='*20, '\n')       
    

#### Below cells calculate past performance of model picks

In [35]:
def calc_portf_shares(df_picks_close, date, str_symbols, portf_target):
  # calculate number of shares to buy for symbols in str_symbols to meet port_target
  import numpy as np
  from ast import literal_eval    
  l_syms = literal_eval(str_symbols)  # convert list stored as str back to list
  # array of closing prices corresponding to symbols in l_syms    
  ar_price = df_picks_close.loc[date][l_syms].values  
  sym_cnt = len(l_syms)  # number of symbols
  amt_per_sym = portf_target / sym_cnt  # target dollar investment in each symbol
  ar_shares = np.floor(amt_per_sym / ar_price)  # array of shares for each symbol
  return ar_shares

In [36]:
def calc_portf_value(df_picks_close, date, str_symbols, ar_shares, verbose=False):
    import numpy as np
    from ast import literal_eval    
    l_syms = literal_eval(str_symbols)  # convert list stored as str back to list
    # array of closing prices corresponding to symbols in l_syms    
    ar_price = df_picks_close.loc[date][l_syms].values  
    ar_value = ar_price * ar_shares  # array of actual dollar amount invested in each symbol
    portf_value = sum(ar_value)  # total actual portfolio value
    if verbose:
        print(f'{date = }, {l_syms = }, {ar_price = }, {ar_shares = }, {ar_value = }, {portf_value = }')            
        print(f'{date} {portf_value = }')
    return date, l_syms, ar_price, ar_shares, ar_value, portf_value

In [37]:
def get_next_date_n(df_picks_close, date, n=1):

    """Get the next n(default=1) date from df_picks_close. date has to in df_picks_close.index.

    Edge case: The function won't return the first date in df_picks_close as the
    next trading date, if date is a trading date just before the first date in df_picks_close.
    """

    len_df_picks_close = len(df_picks_close)
    idx_dates = df_picks_close.index.strftime('%Y-%m-%d')  # date index of df_picks_close    

    if date in idx_dates:
        idx_next_date = idx_dates.get_loc(date) + n  # get index position of next date
        print(f'n: {n},  idx_next_date: {idx_next_date}')

        if idx_next_date >= 0:  # if idx_nex_date is negative, stop index loop back from index[0] to index[-1]    
            if idx_next_date >= (len_df_picks_close - n):  # next date's index is within bound of df_picks_close
                next_date = idx_dates[idx_dates.get_loc(date) + n]  # get the next date
                return next_date
        
              
            else:  # idx_next_date is out-of_bounds df_picks_close
                print(f"{date} is the last date in df_picks_close. No data for next {n} day")
                return None    
        
        
        else:  # idx_next_date is negative, no data in df_picks_close
            # no data from df_picks_close
            print(f'idx_next_date: {idx_next_date} is negative, no data for {n} days from {date} in df_picks_close')

    else:     
        # no data from df_picks_close
        print(f'no data for {date} in df_picks_close')
        return None

In [38]:
def is_date_in_close(date, df_picks_close):
  idx_close = df_picks_close.index.strftime('%Y-%m-%d')
  if date in idx_close:
    return date
  else:
    return None

In [39]:
def calc_portf_value_date_buy_(dates_in_days_lookbacks, my_symbols, df_picks_close, portf_target, n, verbose=False):
  
  z_date_syms = zip(dates_in_days_lookbacks, my_symbols)

  date_exec = []  # buy date of portfolio
  shares_syms = []  # lists of shares of each symbol brought on date
  value_portf = [] # list of porfolio value on date
  shares_SPY = []  # list of shares of SPY brought on date
  value_SPY = []  # list of value of SPY shares on date 

  for date, syms in z_date_syms:
    next_date_n = get_NYSE_date_n(date, dates_NYSE_2013_2023, n, verbose=False)
    close_date_n = is_date_in_close(next_date_n, df_picks_close)



    l_syms = literal_eval(syms)  # convert list stored as str back to list
    # True if symbol(s) in l_syms is not a column in df_picks_close
    sym_not_in_df_picks_close = any_not_in_list(l_syms, symbols_df_picks_close)

    if close_date_n is None or sym_not_in_df_picks_close:



    # if close_date_n is None:
      p_date = None
      p_ar_shares = None
      p_portf_value = None  # set to None when data are not available in df_picks_close
      SPY_shares = None
      SPY_value = None  # set to None when data are not available in df_picks_close

      if verbose:
        print(f"No data for close_date_n {close_date_n}, pick's portf value = None")
        print(f'No data for close_date_n {close_date_n}, SPY portf value =    None')

    else:    
      p_ar_shares = calc_portf_shares(df_picks_close, close_date_n, syms, portf_target)
      p_date, l_syms, ar_price, ar_shares, ar_value, p_portf_value = \
        calc_portf_value(df_picks_close, close_date_n, syms, p_ar_shares, verbose)

      syms = str(['SPY'])
      SPY_shares = calc_portf_shares(df_picks_close, close_date_n, syms, portf_target)
      date, l_syms, ar_price, ar_shares, ar_value, SPY_value = \
        calc_portf_value(df_picks_close, close_date_n, syms, SPY_shares, verbose)

      if verbose:
        print(f"close_date_n pick's portf value = {p_portf_value}")
        print(f'close_date_n SPY portf value =    {SPY_value}')

    date_exec.append(p_date)
    shares_syms.append(p_ar_shares)
    value_portf.append(p_portf_value)
    shares_SPY.append(SPY_shares)
    value_SPY.append(SPY_value)

    print('='*20, '\n')

  return date_exec, shares_syms, value_portf, shares_SPY, value_SPY       
    

In [40]:
def calc_portf_value_date_buy(dates_in_days_lookbacks, my_symbols, df_picks_close, portf_target, n, verbose=False):
  
  z_date_syms = zip(dates_in_days_lookbacks, my_symbols)

  date_exec = []  # buy date of portfolio
  shares_syms = []  # lists of shares of each symbol brought on date
  value_portf = [] # list of porfolio value on date
  shares_SPY = []  # list of shares of SPY brought on date
  value_SPY = []  # list of value of SPY shares on date 

  for date, syms in z_date_syms:
    next_date_n = get_NYSE_date_n(date, dates_NYSE_2013_2023, n, verbose=False)
    close_date_n = is_date_in_close(next_date_n, df_picks_close)

    if close_date_n is None:
      p_date = None
      p_ar_shares = None
      p_portf_value = None  # set to None when data are not available in df_picks_close
      SPY_shares = None
      SPY_value = None  # set to None when data are not available in df_picks_close

      if verbose:
        print(f"No data for close_date_n {close_date_n}, pick's portf value = None")
        print(f'No data for close_date_n {close_date_n}, SPY portf value =    None')

    else:    
      p_ar_shares = calc_portf_shares(df_picks_close, close_date_n, syms, portf_target)
      p_date, l_syms, ar_price, ar_shares, ar_value, p_portf_value = \
        calc_portf_value(df_picks_close, close_date_n, syms, p_ar_shares, verbose)

      syms = str(['SPY'])
      SPY_shares = calc_portf_shares(df_picks_close, close_date_n, syms, portf_target)
      date, l_syms, ar_price, ar_shares, ar_value, SPY_value = \
        calc_portf_value(df_picks_close, close_date_n, syms, SPY_shares, verbose)

      if verbose:
        print(f"close_date_n pick's portf value = {p_portf_value}")
        print(f'close_date_n SPY portf value =    {SPY_value}')

    date_exec.append(p_date)
    shares_syms.append(p_ar_shares)
    value_portf.append(p_portf_value)
    shares_SPY.append(SPY_shares)
    value_SPY.append(SPY_value)

    print('='*20, '\n')

  return date_exec, shares_syms, value_portf, shares_SPY, value_SPY       
    

In [41]:
def calc_portf_value_date_n(dates_in_days_lookbacks, my_symbols, df_picks_close, my_portf_shares, my_SPY_shares, n, verbose=False):
  
  z_date_syms_shares = zip(dates_in_days_lookbacks, my_symbols, my_portf_shares, my_SPY_shares)

  date_exec = []  # buy date of portfolio
  shares_syms = []  # lists of shares of each symbol brought on date
  value_portf = [] # list of porfolio value on date
  shares_SPY = []  # list of shares of SPY brought on date
  value_SPY = []  # list of value of SPY shares on date 

  for date, symbols, portf_shares, SPY_shares in z_date_syms_shares:
    next_date_n = get_NYSE_date_n(date, dates_NYSE_2013_2023, n, verbose=False)
    close_date_n = is_date_in_close(next_date_n, df_picks_close)

    if close_date_n is None:
      p_date_exec = None
      p_ar_shares = None
      p_value_portf = None  # set to None when data are not available in df_picks_close
      SPY_ar_shares = None
      SPY_value_portf = None # set to None when data are not available in df_picks_close

      if verbose:
        print(f"No data for close_date_n {close_date_n}, pick's portf value = None")
        # print(f'No data for next_date_n {next_date_n}, SPY portf value =    None')

    else: 
      if portf_shares is None:
        p_date_exec = None
        p_ar_shares = None
        p_value_portf = None
        SPY_ar_shares = None
        SPY_value_portf = None

      else:  
        p_date_exec, p_ar_syms, p_ar_price, p_ar_shares, p_ar_value, p_value_portf = \
          calc_portf_value(df_picks_close, close_date_n, symbols, portf_shares, verbose)
        
        SPY = str(['SPY'])
        SPY_date_exec, SPY_ar_syms, SPY_ar_price, SPY_ar_shares, SPY_ar_value, SPY_value_portf = \
          calc_portf_value(df_picks_close, close_date_n, SPY, SPY_shares, verbose) 

        if verbose:
          print(f"next_date_n pick's portf value = {p_value_portf}")
          print(f'next_date_n SPY portf value =    {SPY_value_portf}')

    date_exec.append(p_date_exec)
    shares_syms.append(p_ar_shares)
    value_portf.append(p_value_portf)
    shares_SPY.append(SPY_ar_shares)
    value_SPY.append(SPY_value_portf)

    print('='*20, '\n')

  return date_exec, shares_syms, value_portf, shares_SPY, value_SPY  

In [42]:
df_picks_mp

Unnamed: 0,date,days_lookback,syms_freq,symbols
0,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV']
1,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
2,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
3,2023-12-11,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
4,2023-12-08,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
...,...,...,...,...
285,2023-03-20,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']"
286,2023-03-17,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']"
287,2023-03-17,"[30, 60, 120]",sym_freq_8,['GE']
288,2023-03-16,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']"


In [43]:
df_picks_mp.date

0      2023-12-15
1      2023-12-13
2      2023-12-12
3      2023-12-11
4      2023-12-08
          ...    
285    2023-03-20
286    2023-03-17
287    2023-03-17
288    2023-03-16
289    2023-03-15
Name: date, Length: 290, dtype: object

In [44]:
df_picks_mp.symbols

0              ['SHV']
1       ['GPS', 'SHV']
2       ['GPS', 'SHV']
3       ['GPS', 'SHV']
4       ['GPS', 'SHV']
            ...       
285    ['FTSM', 'SHV']
286    ['FTSM', 'SHV']
287             ['GE']
288    ['FTSM', 'SHV']
289    ['FTSM', 'SHV']
Name: symbols, Length: 290, dtype: object

In [45]:
df_picks_close

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,AB,ABBV,ABR,ABT,ACGL,ACHC,ACIW,ACLS,ACRS,ADBE,...,XRX,XYL,YELP,YUM,YUMC,YY,ZBH,ZBRA,ZD,ZG,ZION,ZTO,ZTS,ZUMZ,ZWS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2018-01-02,64.734879,54.001663,51.647560,97.674683,40.722866,14.967834,74.990395,4.823589,53.086960,29.433332,33.810001,22.690001,29.549999,25.030001,177.699997,...,22.436722,63.285557,43.240002,73.298286,38.446995,105.329613,115.217346,103.709999,63.104816,42.299999,42.113953,15.327106,68.963165,20.000000,25.104177
2018-01-03,66.381958,53.345852,51.014030,98.558548,40.715778,15.116324,76.163918,4.784509,53.204330,29.459999,33.430000,22.510000,29.900000,23.910000,181.039993,...,22.406174,64.057220,43.119999,73.235397,39.970482,103.508492,116.016029,105.769997,63.222748,42.730000,42.064110,15.224426,69.280273,20.500000,25.336090
2018-01-04,65.884010,53.541618,51.335670,102.195229,40.904900,15.353909,75.729538,4.773345,53.114044,29.570000,33.459999,22.730000,30.200001,24.190001,183.220001,...,22.681181,64.484879,42.820000,73.980965,40.216511,105.737183,115.848862,107.860001,63.576561,42.009998,42.238541,15.047074,69.693481,22.950001,25.452047
2018-01-05,66.937386,52.944546,51.316177,103.281616,41.370625,15.561793,77.047844,4.778927,53.267548,29.453333,33.110001,22.799999,31.000000,24.629999,185.339996,...,22.849249,64.364021,43.169998,74.412140,40.453087,111.087799,117.000481,109.540001,63.475468,42.340000,42.255161,14.981731,70.491005,22.549999,25.442381
2018-01-08,67.081032,53.835266,50.809341,102.554291,41.216965,15.858779,75.813354,4.834755,53.114044,29.456667,32.389999,23.040001,31.299999,25.379999,185.039993,...,23.116623,64.596451,43.669998,74.537895,40.386856,112.102409,117.223366,110.629997,64.410538,42.619999,42.047497,14.655028,71.336601,23.000000,26.766195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-11,128.970001,24.930000,13.720000,57.080002,193.179993,29.820000,151.240005,14.100000,106.220001,79.610001,74.959999,28.000000,125.580002,0.973000,625.200012,...,15.170000,107.699997,44.099998,126.629997,39.919998,36.939999,117.980003,239.649994,63.730000,43.820000,38.630001,21.020000,189.460007,18.969999,29.610001
2023-12-12,128.789993,24.049999,14.040000,56.250000,194.710007,30.559999,153.240005,14.190000,106.680000,80.279999,75.910004,28.320000,126.190002,1.010000,633.659973,...,15.710000,107.940002,44.480000,128.070007,40.320000,39.029999,117.449997,239.380005,62.560001,45.419998,38.130001,21.020000,191.470001,18.570000,29.230000
2023-12-13,133.740005,25.990000,14.090000,60.830002,197.960007,30.809999,154.300003,14.880000,107.250000,78.379997,78.959999,29.240000,131.500000,1.090000,624.260010,...,16.709999,109.180000,44.490002,130.929993,39.750000,38.500000,117.900002,244.429993,64.720001,50.279999,41.820000,20.400000,197.410004,19.670000,29.320000
2023-12-14,137.960007,29.900000,14.590000,63.970001,198.110001,31.870001,154.880005,15.700000,108.599998,74.669998,77.580002,29.639999,140.429993,1.060000,584.640015,...,17.580000,110.930000,45.360001,131.110001,40.160000,38.799999,118.529999,272.160004,66.320000,54.500000,45.669998,20.309999,200.089996,19.930000,29.610001


In [46]:
# total target investment for each day's picks, if day's pick has 2 symbols, the function will try to invest $500 for each symbol
portf_target = 1000  
date_buy, shares_syms, value_portf, shares_SPY, value_SPY = \
  calc_portf_value_date_buy_(df_picks_mp.date, df_picks_mp.symbols, df_picks_close, portf_target, n=1, verbose=verbose)

No data for close_date_n None, pick's portf value = None
No data for close_date_n None, SPY portf value =    None

date = '2023-12-14', l_syms = ['GPS', 'SHV'], ar_price = array([ 21.29999924, 109.86000061]), ar_shares = array([23.,  4.]), ar_value = array([489.89998245, 439.44000244]), portf_value = 929.3399848937988
2023-12-14 portf_value = 929.3399848937988
date = '2023-12-14', l_syms = ['SPY'], ar_price = array([472.01000977]), ar_shares = array([2.]), ar_value = array([944.02001953]), portf_value = 944.02001953125
2023-12-14 portf_value = 944.02001953125
close_date_n pick's portf value = 929.3399848937988
close_date_n SPY portf value =    944.02001953125

date = '2023-12-13', l_syms = ['GPS', 'SHV'], ar_price = array([ 21.22999954, 109.80599976]), ar_shares = array([23.,  4.]), ar_value = array([488.28998947, 439.22399902]), portf_value = 927.513988494873
2023-12-13 portf_value = 927.513988494873
date = '2023-12-13', l_syms = ['SPY'], ar_price = array([470.5]), ar_shares = array([

In [47]:
# # total target investment for each day's picks, if day's pick has 2 symbols, the function will try to invest $500 for each symbol
# portf_target = 1000  
# date_buy, shares_syms, value_portf, shares_SPY, value_SPY = \
#   calc_portf_value_date_buy(df_picks_mp.date, df_picks_mp.symbols, df_picks_close, portf_target, n=1, verbose=verbose)

In [48]:
df_picks_mp['date_buy'] = date_buy
df_picks_mp['sh_portf_buy'] = shares_syms
df_picks_mp['$_portf_buy'] = value_portf
df_picks_mp['sh_SPY_buy'] = shares_SPY
df_picks_mp['$_SPY_buy'] = value_SPY
# df_picks_mp.tail()
# df_picks_mp.head()
df_picks_mp

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy
0,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],,,,,
1,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],944.020020
2,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-13,"[23.0, 4.0]",927.513988,[2.0],941.000000
3,2023-12-11,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-12,"[23.0, 4.0]",928.964710,[2.0],928.200012
4,2023-12-08,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-11,"[22.0, 4.0]",921.405289,[2.0],923.979980
...,...,...,...,...,...,...,...,...,...
285,2023-03-20,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-21,"[8.0, 4.0]",883.741425,[2.0],792.073425
286,2023-03-17,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-20,"[8.0, 4.0]",884.319916,[2.0],781.807922
287,2023-03-17,"[30, 60, 120]",sym_freq_8,['GE'],2023-03-20,[11.0],987.678345,[2.0],781.807922
288,2023-03-16,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-17,"[8.0, 4.0]",883.701660,[2.0],774.361938


In [49]:
date_exec, shares_syms, value_portf, shares_SPY, value_SPY  = \
  calc_portf_value_date_n(df_picks_mp.date, df_picks_mp.symbols, df_picks_close, df_picks_mp.sh_portf_buy, df_picks_mp.sh_SPY_buy, n=4, verbose=verbose)

No data for close_date_n None, pick's portf value = None

No data for close_date_n None, pick's portf value = None

No data for close_date_n None, pick's portf value = None

date = '2023-12-15', l_syms = ['GPS', 'SHV'], ar_price = array([ 21.30999947, 109.87000275]), ar_shares = array([23.,  4.]), ar_value = array([490.12998772, 439.48001099]), portf_value = 929.6099987030029
2023-12-15 portf_value = 929.6099987030029
date = '2023-12-15', l_syms = ['SPY'], ar_price = array([469.32998657]), ar_shares = array([2.]), ar_value = array([938.65997314]), portf_value = 938.6599731445312
2023-12-15 portf_value = 938.6599731445312
next_date_n pick's portf value = 929.6099987030029
next_date_n SPY portf value =    938.6599731445312

date = '2023-12-14', l_syms = ['GPS', 'SHV'], ar_price = array([ 21.29999924, 109.86000061]), ar_shares = array([22.,  4.]), ar_value = array([468.59998322, 439.44000244]), portf_value = 908.0399856567383
2023-12-14 portf_value = 908.0399856567383
date = '2023-12-14',

In [50]:
df_picks_mp['date_sell'] = date_exec
df_picks_mp['sh_portf_sell'] = shares_syms
df_picks_mp['$_portf_sell'] = value_portf
df_picks_mp['%_portf_chg'] = (df_picks_mp['$_portf_sell'] / df_picks_mp['$_portf_buy'] - 1) * 100

df_picks_mp['sh_SPY_sell'] = shares_SPY
df_picks_mp['$_SPY_sell'] = value_SPY
df_picks_mp['%_SPY_chg'] = (df_picks_mp['$_SPY_sell'] / df_picks_mp['$_SPY_buy'] - 1) * 100

df_picks_mp['dif_%_chg'] = df_picks_mp['%_portf_chg'] - df_picks_mp['%_SPY_chg']

In [51]:
# get symbol's close price for that date
date = '2023-03-16'
# _date = '2023-03-21'
# _sym = 'GE'
# _sym = 'NVDA'
_sym = 'FTSM'
# _sym = 'SHV'
_sym = 'SPY'
# _c = df_picks_close.loc['2023-04-11']['NVDA']
_c = df_picks_close.loc[_date][_sym]
_c

385.0067138671875

#### dif_%_chg is the percentage change of model icks - percentage change of SPY

In [52]:
df_picks_mp.head(20)

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy,date_sell,sh_portf_sell,$_portf_sell,%_portf_chg,sh_SPY_sell,$_SPY_sell,%_SPY_chg,dif_%_chg
0,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],,,,,,,,,,,,,
1,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],944.02002,,,,,,,,
2,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-13,"[23.0, 4.0]",927.513988,[2.0],941.0,,,,,,,,
3,2023-12-11,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-12,"[23.0, 4.0]",928.96471,[2.0],928.200012,2023-12-15,"[23.0, 4.0]",929.609999,0.069463,[2.0],938.659973,1.126908,-1.057445
4,2023-12-08,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-11,"[22.0, 4.0]",921.405289,[2.0],923.97998,2023-12-14,"[22.0, 4.0]",908.039986,-1.450535,[2.0],944.02002,2.168882,-3.619417
5,2023-12-07,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-08,"[23.0, 4.0]",937.585289,[2.0],920.400024,2023-12-13,"[23.0, 4.0]",927.513988,-1.074174,[2.0],941.0,2.238155,-3.312329
6,2023-12-06,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-07,"[23.0, 4.0]",930.455301,[2.0],916.460022,2023-12-12,"[23.0, 4.0]",928.96471,-0.1602,[2.0],928.200012,1.281015,-1.441215
7,2023-12-05,"[30, 60, 120]",sym_freq_9,['GPS'],2023-12-06,[47.0],984.180025,[2.0],909.52002,2023-12-11,[47.0],1030.710014,4.727792,[2.0],923.97998,1.589845,3.137947
8,2023-12-05,"[30, 60, 120]",sym_freq_8,"['LRN', 'SHV']",2023-12-06,"[8.0, 4.0]",924.266205,[2.0],909.52002,2023-12-11,"[8.0, 4.0]",925.825287,0.168683,[2.0],923.97998,1.589845,-1.421162
9,2023-12-05,"[15, 30, 60, 120]",sym_freq_12,['GPS'],2023-12-06,[47.0],984.180025,[2.0],909.52002,2023-12-11,[47.0],1030.710014,4.727792,[2.0],923.97998,1.589845,3.137947


In [53]:
df_picks_mp['win'] = np.where(df_picks_mp['dif_%_chg']>0,1,0)
df_picks_mp.head(20)

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy,date_sell,sh_portf_sell,$_portf_sell,%_portf_chg,sh_SPY_sell,$_SPY_sell,%_SPY_chg,dif_%_chg,win
0,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],,,,,,,,,,,,,,0
1,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],944.02002,,,,,,,,,0
2,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-13,"[23.0, 4.0]",927.513988,[2.0],941.0,,,,,,,,,0
3,2023-12-11,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-12,"[23.0, 4.0]",928.96471,[2.0],928.200012,2023-12-15,"[23.0, 4.0]",929.609999,0.069463,[2.0],938.659973,1.126908,-1.057445,0
4,2023-12-08,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-11,"[22.0, 4.0]",921.405289,[2.0],923.97998,2023-12-14,"[22.0, 4.0]",908.039986,-1.450535,[2.0],944.02002,2.168882,-3.619417,0
5,2023-12-07,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-08,"[23.0, 4.0]",937.585289,[2.0],920.400024,2023-12-13,"[23.0, 4.0]",927.513988,-1.074174,[2.0],941.0,2.238155,-3.312329,0
6,2023-12-06,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-07,"[23.0, 4.0]",930.455301,[2.0],916.460022,2023-12-12,"[23.0, 4.0]",928.96471,-0.1602,[2.0],928.200012,1.281015,-1.441215,0
7,2023-12-05,"[30, 60, 120]",sym_freq_9,['GPS'],2023-12-06,[47.0],984.180025,[2.0],909.52002,2023-12-11,[47.0],1030.710014,4.727792,[2.0],923.97998,1.589845,3.137947,1
8,2023-12-05,"[30, 60, 120]",sym_freq_8,"['LRN', 'SHV']",2023-12-06,"[8.0, 4.0]",924.266205,[2.0],909.52002,2023-12-11,"[8.0, 4.0]",925.825287,0.168683,[2.0],923.97998,1.589845,-1.421162,0
9,2023-12-05,"[15, 30, 60, 120]",sym_freq_12,['GPS'],2023-12-06,[47.0],984.180025,[2.0],909.52002,2023-12-11,[47.0],1030.710014,4.727792,[2.0],923.97998,1.589845,3.137947,1


### Model Performance Not Good

In [54]:
wins = df_picks_mp['win'].sum()
attempts = len(df_picks_mp['dif_%_chg'].dropna())

win_rate = wins / attempts
print(f'win_rate: {win_rate:0.6f}, wins: {wins}, attempts: {attempts}')
print(f'sum(df_picksf_%_chg): {df_picks_mp["dif_%_chg"].sum():0.6f}')


win_rate: 0.421986, wins: 119, attempts: 282
sum(df_picksf_%_chg): -103.323054


In [55]:
df_picks = df_picks_mp.copy()

In [56]:
df_modelpicks = df_picks[df_picks['dif_%_chg'].isnull()].copy()
pickle_dump(df_modelpicks, path_data_dump, 'df_modelpicks')
_df_picks = pickle_load(path_data_dump, 'df_modelpicks')
_df_picks

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy,date_sell,sh_portf_sell,$_portf_sell,%_portf_chg,sh_SPY_sell,$_SPY_sell,%_SPY_chg,dif_%_chg,win
0,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],,,,,,,,,,,,,,0
1,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],944.02002,,,,,,,,,0
2,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-13,"[23.0, 4.0]",927.513988,[2.0],941.0,,,,,,,,,0
247,2023-05-11,"[30, 60, 120]",sym_freq_8,"['ISEE', 'SHV']",,,,,,,,,,,,,,0
248,2023-05-10,"[30, 60, 120]",sym_freq_8,"['ISEE', 'SHV']",,,,,,,,,,,,,,0
249,2023-05-09,"[30, 60, 120]",sym_freq_8,"['ISEE', 'SHV']",,,,,,,,,,,,,,0
250,2023-05-08,"[30, 60, 120]",sym_freq_8,"['ISEE', 'SHV']",,,,,,,,,,,,,,0
251,2023-05-05,"[30, 60, 120]",sym_freq_8,"['ISEE', 'SHV']",,,,,,,,,,,,,,0


In [57]:
idx_last_row = df_modelpicks.index[-1]
df_modelpicks_results = df_picks.iloc[(idx_last_row + 1)::].copy()
pickle_dump(df_modelpicks_results, path_data_dump, 'df_modelpicks_results')
_df_picks = pickle_load(path_data_dump, 'df_modelpicks_results')
_df_picks

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy,date_sell,sh_portf_sell,$_portf_sell,%_portf_chg,sh_SPY_sell,$_SPY_sell,%_SPY_chg,dif_%_chg,win
252,2023-05-04,"[30, 60, 120]",sym_freq_8,['SHV'],2023-05-05,[9.0],957.980621,[2.0],819.315796,2023-05-10,[9.0],958.415543,0.0454,[2.0],819.752686,0.053324,-0.007924,0
253,2023-05-03,"[30, 60, 120]",sym_freq_8,['SHV'],2023-05-04,[9.0],958.067688,[2.0],804.423889,2023-05-09,[9.0],958.067688,0.0,[2.0],815.940308,1.431636,-1.431636,0
254,2023-05-02,"[30, 60, 120]",sym_freq_8,['SHV'],2023-05-03,[9.0],957.545631,[2.0],810.162231,2023-05-08,[9.0],958.067688,0.05452,[2.0],819.534241,1.156806,-1.102286,0
255,2023-04-26,"[30, 60, 120]",sym_freq_8,['ELF'],2023-04-27,[10.0],928.499985,[2.0],818.879028,2023-05-02,[10.0],917.399979,-1.195477,[2.0],815.761597,-0.380695,-0.814782,0
256,2023-04-25,"[30, 60, 120]",sym_freq_8,"['ELF', 'SHV']",2023-04-26,"[5.0, 4.0]",894.563438,[2.0],802.894958,2023-05-01,"[5.0, 4.0]",888.982605,-0.623861,[2.0],825.034302,2.75744,-3.381301,0
257,2023-04-24,"[30, 60, 120]",sym_freq_8,['ELF'],2023-04-25,[10.0],923.899994,[2.0],806.310181,2023-04-28,[10.0],927.600021,0.400479,[2.0],825.868286,2.42563,-2.025151,0
258,2023-04-21,"[30, 60, 120]",sym_freq_8,['ELF'],2023-04-24,[10.0],948.399963,[2.0],819.315796,2023-04-27,[10.0],928.499985,-2.098269,[2.0],818.879028,-0.053309,-2.04496,0
259,2023-04-20,"[30, 60, 120]",sym_freq_8,['ELF'],2023-04-21,[10.0],966.800003,[2.0],818.462036,2023-04-26,[10.0],938.499985,-2.927184,[2.0],802.894958,-1.901991,-1.025193,0
260,2023-04-19,"[30, 60, 120]",sym_freq_8,['SHV'],2023-04-20,[9.0],956.435257,[2.0],817.82666,2023-04-25,[9.0],956.86853,0.045301,[2.0],806.310181,-1.408181,1.453482,1
261,2023-04-18,"[30, 60, 120]",sym_freq_8,['SHV'],2023-04-19,[9.0],955.915329,[2.0],822.314087,2023-04-24,[9.0],956.608498,0.072514,[2.0],819.315796,-0.364616,0.43713,1
