In [323]:
def get_trading_date_n(date, idx_close, n, verbose=False):
  """
  This function takes a date, a closing price date index, a number of days (n), and an optional verbosity flag,
  and returns the trading date n days away from the input date if it exists in the index, or None otherwise.

  Args:
    date: The date to start from (as a datetime object).
    idx_close: A dictionary or Series containing date index of close prices.
    n: The number of days to move forward or backward in time (positive for future, negative for past).
    verbose: An optional flag to print additional information about the process (default: False).

  Returns:
    The trading date n days away from the input date if it exists in the index, or None otherwise.
  """

  # Check if the input date is present in the closing price index.
  if date in idx_close:
    # Get the last and current index positions of the input date.
    idx_last = len(idx_close) - 1  # Last index of the closing price index.
    idx_date = idx_close.get_loc(date)  # Index of the input date in the index.

    # Calculate the index of the date n days away from the input date.
    idx_date_n = idx_date + n

    # Print debug information if verbose flag is set.
    if verbose:
      print(f"date: {date} is in idx_close, "
            f"date's position in idx_close is: {idx_date} of {idx_last}, "
            f"n: {n}, idx_date_n: {idx_date_n},")

    # Check if the calculated index is within the bounds of the closing price index.
    if 0 <= idx_date_n <= idx_last:
      # Get the date n days away from the input date using the calculated index.
      date_n = idx_close[idx_date_n]

      # Print debug information if verbose flag is set.
      if verbose:
        print(f"idx_date_n: {idx_date_n} is within bounds of idx_close (0 to {idx_last}), date_n: {date_n}\n")

    else:
      # If the calculated index is out of bounds, set the output date to None.
      date_n = None

      # Print debug information if verbose flag is set.
      if verbose:
        print(f"idx_date_n: {idx_date_n} is out-of-bounds of idx_close (0 to {idx_last})\n")

  else:
    # If the input date is not in the closing price index, set the output date to None.
    date_n = None

    # Print debug information if verbose flag is set.
    if verbose:
      print(f"date: {date} is not in idx_close\n")

  # Return the date n days away from the input date if it exists in the index, or None otherwise.
  return date_n

In [324]:
def any_not_in_list(list1, list2):
  """
  Checks if any items in list1 are not in list2.

  Args:
    list1: A list of items.
    list2: Another list of items.

  Returns:
    True if any item in list1 is not in list2, False if all are present.
  """
  return bool(set(list1) - set(list2))

In [325]:
def is_date_in_close(date, df_close):
  """
  Checks if a given date is present in a DataFrame's index, considering only the date portion (YYYY-MM-DD).

  Args:
    date (str): The date to check in the format YYYY-MM-DD.
    df_close (pandas.DataFrame): A DataFrame containing a date index.

  Returns:
    str or None: The input date if it's found in the index, otherwise None.
  """  

  # Format the index to YYYY-MM-DD for comparison
  idx_close = df_close.index.strftime('%Y-%m-%d')

  # Check if the date is in the formatted index
  if date in idx_close:
    # Return the date if found
    return date
  else:
    # Return None if not found
    return None

In [326]:
def calc_portf_shares(df_close, date, str_symbols, portf_target):
  """
  Calculates the number of shares to buy for each symbol in a portfolio to meet a target investment value.

  Args:
    df_close (pandas.DataFrame): DataFrame with closing prices, indexed by date and symbols.
    date (str): The date for which to calculate shares.
    str_symbols (str): String representation of a list of symbols to include in the portfolio.
    portf_target (float): The total target investment value for the portfolio.

  Returns:
    numpy.ndarray: An array containing the number of shares to buy for each symbol.
  """

  # Import necessary libraries
  import numpy as np
  from ast import literal_eval

  # Convert string representation of symbols to a list
  l_syms = literal_eval(str_symbols)

  # Retrieve closing prices for the specified symbols on the given date
  ar_price = df_close.loc[date][l_syms].values

  # Calculate the number of symbols
  sym_cnt = len(l_syms)

  # Determine the target investment amount per symbol
  amt_per_sym = portf_target / sym_cnt

  # Calculate the number of shares to buy for each symbol, rounding down to whole shares
  ar_shares = np.floor(amt_per_sym / ar_price)

  return ar_shares

In [327]:
def calc_portf_value(df_close, date, str_symbols, ar_shares, verbose=False):
  """
  Calculates the total value of a portfolio of stocks for a given date.

  Args:
  df_close (pandas.DataFrame): DataFrame containing closing prices, indexed by date and symbols.
  date (str): The date for which to calculate the portfolio value.
  str_symbols (str): String representation of a list of symbols in the portfolio.
  ar_shares (numpy.ndarray): Array containing the number of shares held for each symbol.
  verbose (bool, optional): If True, prints details of the calculation. Defaults to False.

  Returns:
  tuple: A tuple containing the date, symbols, closing prices, shares, individual symbol values, and total portfolio value.
  """

  # Import necessary libraries
  import numpy as np
  from ast import literal_eval

  # Convert string representation of symbols to a list
  l_syms = literal_eval(str_symbols)

  # Retrieve closing prices for the specified symbols on the given date
  ar_price = df_close.loc[date][l_syms].values

  # Calculate the dollar value invested in each symbol
  ar_value = ar_price * ar_shares

  # Calculate the total portfolio value by summing the individual symbol values
  portf_value = sum(ar_value)

  # Print details if verbose is True
  if verbose:
    print(f'date = {date}, l_syms = {l_syms}, ar_price = {ar_price}, ar_shares = {ar_shares}, ar_value = {ar_value}, portf_value = {portf_value}')
    print(f'{date} {portf_value = }')

  # Return the calculated values
  return date, l_syms, ar_price, ar_shares, ar_value, portf_value

In [328]:
def calc_portf_value_date_buy(idx_date, str_symbols, df_close, portf_target, n, verbose=False):
  """
  Calculates portfolio values for multiple dates and symbol combinations, handling potential data gaps.

  Args:
  idx_date (list): List of dates (str) to evaluate portfolios.
  str_symbols (list): List of symbol lists (str) to evaluate for each date.
  df_close (pandas.DataFrame): DataFrame with closing prices, indexed by date and symbols.
  portf_target (float): Target portfolio value for allocation.
  n (int): Number of trading days to offset execution from the lookback date.
  verbose (bool, optional): If True, prints details. Defaults to False.

  Returns:
  tuple: Tuple containing lists of buy dates, shares, and portfolio values for both the selected symbols and SPY.
  """

  from ast import literal_eval

  # Extract trading dates and symbols from the DataFrame
  idx_close = df_close.index.strftime('%Y-%m-%d')  # Trading dates in format YYYY-MM-DD
  symbols_df_close = df_close.columns  # Symbols in df_close

  # Initialize lists to store results
  date_exec = []  # Buy date of portfolio
  shares_syms = []  # Lists of shares of each symbol bought on each date
  value_portf = []  # List of portfolio values on each date
  shares_SPY = []  # List of shares of SPY bought on each date
  value_SPY = []  # List of values of SPY shares on each date

  # Iterate through pairs of dates and symbol lists
  for date, syms in zip(idx_date, str_symbols):

    # Determine the execution date based on the lookback date and the offset n
    next_date_n = get_trading_date_n(date, idx_close, n, verbose=False)
    close_date_n = is_date_in_close(next_date_n, df_close)

    # Print information for debugging
    print(f'++++++++++++++')
    print(f'date: {date}')
    print(f'n: {n}')         
    print(f'next_date_n: {next_date_n}')
    print(f'close_date_n: {close_date_n}')
    print(f'syms: {syms}')
    print(f'++++++++++++++')    

    # Convert string representation of symbols back to a list
    l_syms = literal_eval(syms)

    # Check if any symbols are not present in the DataFrame
    sym_not_in_df_close = any_not_in_list(l_syms, symbols_df_close)

    if close_date_n is None or sym_not_in_df_close:
      # Handle missing data:
      print(f'l_syms: {l_syms}')  
      print(f'sym_not_in_df_close: {sym_not_in_df_close}')  

      p_date = None
      p_ar_shares = None
      p_portf_value = None  # Set to None when data are not available
      SPY_shares = None
      SPY_value = None  # Set to None when data are not available

      if verbose:
        print(f"No data for close_date_n {close_date_n}, pick's portf value = None")
        print(f'No data for close_date_n {close_date_n}, SPY portf value =    None')

    else:
      # Calculate portfolio values when data is available:
      p_ar_shares = calc_portf_shares(df_close, close_date_n, syms, portf_target)
      p_date, l_syms, ar_price, ar_shares, ar_value, p_portf_value = calc_portf_value(df_close, close_date_n, syms, p_ar_shares, verbose)

      syms = str(['SPY'])
      SPY_shares = calc_portf_shares(df_close, close_date_n, syms, portf_target)
      date, l_syms, ar_price, ar_shares, ar_value, SPY_value = calc_portf_value(df_close, close_date_n, syms, SPY_shares, verbose)

      if verbose:
        print(f"close_date_n pick's portf value = {p_portf_value}")
        print(f'close_date_n SPY portf value =    {SPY_value}')

    date_exec.append(p_date)
    shares_syms.append(p_ar_shares)
    value_portf.append(p_portf_value)
    shares_SPY.append(SPY_shares)
    value_SPY.append(SPY_value)

    print('='*20, '\n')

  return date_exec, shares_syms, value_portf, shares_SPY, value_SPY

In [329]:
def calc_portf_value_date_n(idx_date, str_symbols, df_close, my_portf_shares, my_SPY_shares, n, verbose=False):
  """
  Calculates portfolio values for multiple dates and symbol combinations, taking pre-determined shares as input.

  Args:
  idx_date (list): List of dates (str) to evaluate portfolios.
  str_symbols (list): List of symbol lists (str) to evaluate for each date.
  df_close (pandas.DataFrame): DataFrame with closing prices, indexed by date and symbols.
  my_portf_shares (list): List of lists of shares for each symbol combination.
  my_SPY_shares (list): List of shares of SPY to evaluate for each date.
  n (int): Number of trading days to offset execution from the lookback date.
  verbose (bool, optional): If True, prints details. Defaults to False.

  Returns:
  tuple: Tuple containing lists of buy dates, shares, and portfolio values for both the selected symbols and SPY.
  """

  # Extract trading dates from the DataFrame
  idx_close = df_close.index.strftime('%Y-%m-%d')  # Trading dates in format YYYY-MM-DD

  # Initialize lists to store results
  date_exec = []  # Buy date of portfolio
  shares_syms = []  # Lists of shares of each symbol bought on each date
  value_portf = []  # List of portfolio values on each date
  shares_SPY = []  # List of shares of SPY bought on each date
  value_SPY = []  # List of values of SPY shares on each date

  # Iterate through pairs of dates, symbol lists, and shares
  for date, symbols, portf_shares, SPY_shares in zip(idx_date, str_symbols, my_portf_shares, my_SPY_shares):

    # Determine the execution date based on the lookback date and the offset n
    next_date_n = get_trading_date_n(date, idx_close, n, verbose=False)
    close_date_n = is_date_in_close(next_date_n, df_close)

    if close_date_n is None or portf_shares is None:
      # Handle missing data or missing shares:
      p_date_exec = None
      p_ar_shares = None
      p_value_portf = None  # Set to None when data or shares are unavailable
      SPY_ar_shares = None
      SPY_value_portf = None  # Set to None when data or shares are unavailable

      if verbose:
        print(f"No data for close_date_n {close_date_n}, pick's portf value = None")

    else:
      # Calculate portfolio values when data and shares are available:
      p_date_exec, p_ar_syms, p_ar_price, p_ar_shares, p_ar_value, p_value_portf = calc_portf_value(df_close, close_date_n, symbols, portf_shares, verbose)

      SPY = str(['SPY'])  # Calculate SPY portfolio value separately
      SPY_date_exec, SPY_ar_syms, SPY_ar_price, SPY_ar_shares, SPY_ar_value, SPY_value_portf = calc_portf_value(df_close, close_date_n, SPY, SPY_shares, verbose)

      if verbose:
        print(f"next_date_n pick's portf value = {p_value_portf}")
        print(f'next_date_n SPY portf value =    {SPY_value_portf}')

    # Append results to lists
    date_exec.append(p_date_exec)
    shares_syms.append(p_ar_shares)
    value_portf.append(p_value_portf)
    shares_SPY.append(SPY_ar_shares)
    value_SPY.append(SPY_value_portf)

    print('='*20, '\n')

  return date_exec, shares_syms, value_portf, shares_SPY, value_SPY

In [330]:
"""
Imports libraries, sets display options, and defines file paths for stock analysis.
"""

import pandas as pd  # Library for data manipulation and analysis
import numpy as np  # Library for numerical operations and array manipulation
from myUtils import pickle_load, pickle_dump  # Functions for loading and saving data

# Set pandas display options for better readability
pd.set_option('display.max_rows', 100)  # Display up to 100 rows
pd.set_option('display.max_columns', 30)  # Display up to 30 columns
pd.set_option('display.max_colwidth', 30)  # Display columns with a maximum width of 30 characters
pd.set_option('display.width', 900)  # Set overall display width to 900 characters

# Define file paths for data storage and retrieval
path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"  # Base directory for stock data
path_data_dump = path_dir + "VSCode_dump/"  # Subdirectory for pickled data

# Define file names for specific datasets
# fp_df_picks = f'df_picks'  # (Commented out) Stock picks by criteria (CAGR/UI, CAGR/rtn_std, rtd/UI)
fp_df_model_top_picks = 'df_model_top_picks'  # Top stock picks from the model developed by backtesting
fp_df_close_clean = 'df_close_clean'  # Historic closing prices for stocks

# Set verbose mode for printing information during execution
verbose = True


In [331]:
"""
Loads pre-processed datasets from pickled files.
"""

# Load the DataFrame containing historical closing prices:
df_close = pickle_load(path_data_dump, fp_df_close_clean)  # Load from the specified pickle file

# Load the DataFrame containing top stock picks from the model:
df_picks_mp = pickle_load(path_data_dump, fp_df_model_top_picks)  # Load from the specified pickle file


In [332]:
"""
Extracts and prints information about the date range and length of the df_picks_mp DataFrame.
"""

# Extract dates from the DataFrame and sort them chronologically:
dates_sorted = sorted(df_picks_mp.date.tolist())  # Create a list of dates and sort them

# Extract the first and last dates to determine the date range:
date_start_picks_mp = dates_sorted[0]  # Get the earliest date
date_end_picks_mp = dates_sorted[-1]  # Get the latest date

# Print the date range and the number of rows in the DataFrame:
print(f'date_start_picks_mp: {date_start_picks_mp}')
print(f'date_end_picks_mp: {date_end_picks_mp}')
print(f'len(df_picks_mp): {len(df_picks_mp)}')  # Print the length (number of rows)


date_start_picks_mp: 2023-03-15
date_end_picks_mp: 2023-12-20
len(df_picks_mp): 293


In [333]:
"""
Filters the df_close DataFrame to match the date range of df_picks_mp and extracts information.
"""

# Create a boolean mask to filter rows within the desired date range:
mask = (df_close.index >= date_start_picks_mp) & (df_close.index <= date_end_picks_mp)  # Select rows between the start and end dates

# Apply the mask to filter the DataFrame:
df_close = df_close.loc[mask]  # Keep only rows that meet the date criteria

# Extract trading dates as formatted strings:
idx_close = df_close.index.strftime('%Y-%m-%d')  # Get trading dates in YYYY-MM-DD format

# Extract symbols present in the filtered DataFrame:
symbols_df_close = df_close.columns  # Get a list of symbols in the DataFrame

# Display the filtered DataFrame to verify its contents:
df_close  # Print the DataFrame to view the filtered data

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,AB,ABBV,ABR,ABT,ACGL,ACHC,ACIW,ACLS,ACRS,ADBE,...,XRX,XYL,YELP,YUM,YUMC,YY,ZBH,ZBRA,ZD,ZG,ZION,ZTO,ZTS,ZUMZ,ZWS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-03-15,133.288223,39.372566,13.86,119.255249,152.371918,32.977058,149.485580,10.526908,96.316025,62.740002,68.459999,22.700001,123.860001,7.990,333.609985,...,14.261086,95.604477,29.170000,125.356514,59.979797,26.028099,124.543419,287.739990,73.779999,39.919998,29.475050,25.606165,162.472305,19.500000,20.806545
2023-03-16,136.062805,40.342770,14.12,119.509888,155.220352,33.193268,150.688766,10.499517,97.527344,66.070000,70.230003,25.780001,129.960007,8.120,353.290009,...,14.451233,96.208504,29.780001,126.273529,60.515686,26.922304,124.712440,294.929993,74.510002,40.849998,30.818752,27.639652,165.193909,19.250000,20.796631
2023-03-17,132.442917,38.758762,13.98,116.934196,154.373779,32.883053,149.640823,10.216487,95.538010,63.150002,69.459999,26.940001,128.220001,7.890,358.140015,...,13.899804,93.802299,29.330000,125.080421,60.257668,27.720362,123.598854,288.709991,74.129997,39.810001,28.736017,28.083862,163.366241,18.540001,20.360270
2023-03-20,133.795410,39.877464,13.96,116.229073,156.764084,33.024059,151.484421,10.791677,96.384949,65.610001,70.449997,26.150000,131.509995,7.950,362.880005,...,14.118475,96.693703,29.690001,127.082115,60.485916,28.028042,125.607292,290.839996,74.650002,39.830002,28.966366,27.659395,164.707184,18.110001,20.657789
2023-03-21,136.251740,41.491180,14.37,117.316147,158.636505,33.334274,152.115112,10.873848,96.837975,67.209999,70.959999,26.200001,133.449997,8.240,374.220001,...,14.546308,97.555176,30.059999,126.756714,60.704235,27.489599,127.058952,294.429993,75.570000,41.939999,30.991514,27.866692,165.124374,18.549999,21.004894
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-14,137.960007,29.900000,14.59,63.970001,198.110001,31.870001,154.880005,15.700000,108.599998,74.669998,77.580002,29.639999,140.429993,1.060,584.640015,...,17.580000,110.930000,45.360001,131.110001,40.160000,38.799999,118.529999,272.160004,66.320000,54.500000,45.669998,20.309999,200.089996,19.930000,29.610001
2023-12-15,136.779999,31.100000,14.49,62.720001,197.570007,32.020000,154.039993,15.380000,107.290001,73.419998,75.430000,29.719999,136.089996,1.010,584.679993,...,18.290001,110.720001,44.790001,128.639999,40.029999,38.540001,118.690002,270.179993,66.550003,54.779999,43.840000,19.830000,196.289993,19.389999,29.350000
2023-12-18,137.529999,30.520000,14.24,60.660000,195.889999,32.209999,153.419998,15.360000,108.059998,74.650002,75.730003,29.730000,134.860001,0.938,599.130005,...,18.040001,110.709999,46.459999,130.470001,40.080002,38.799999,118.800003,266.679993,66.260002,55.330002,43.040001,20.200001,196.720001,19.059999,29.629999
2023-12-19,139.789993,32.419998,14.30,60.860001,196.940002,32.750000,153.600006,16.100000,108.419998,74.830002,76.519997,30.360001,136.179993,1.020,604.640015,...,18.469999,111.580002,46.820000,130.240005,41.330002,39.880001,119.970001,269.420013,67.510002,56.930000,43.930000,19.940001,198.080002,19.459999,29.770000


In [334]:
"""
Displays the first few rows of the df_picks_mp DataFrame for a quick overview.
"""

df_picks_mp.head()  # Show the first 5 rows (by default) to preview the data

Unnamed: 0,date,days_lookback,syms_freq,symbols
0,2023-12-20,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
1,2023-12-19,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"
2,2023-12-18,"[30, 60, 120]",sym_freq_8,['SHV']
3,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV']
4,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']"


In [335]:
"""
Sets a target investment amount and calls a function to calculate portfolio values.
"""

# Set the total target investment amount for each day's picks:
portf_target = 1000  # Invest $1000 in total for each day's picks

# Call the function to calculate portfolio values, handling potential missing data:
date_buy, shares_syms, value_portf, shares_SPY, value_SPY = \
    calc_portf_value_date_buy(df_picks_mp.date, df_picks_mp.symbols, df_close, portf_target, n=1, verbose=verbose)

# Breakdown of the function call:
# - calc_portf_value_date_buy: Function name (assumed to be defined elsewhere)
# - df_picks_mp.date: List of dates from the df_picks_mp DataFrame
# - df_picks_mp.symbols: List of stock symbols from the df_picks_mp DataFrame
# - df_close: DataFrame containing historical closing prices
# - portf_target: Total target investment amount ($1000 in this case)
# - n=1: Offset the execution date by 1 trading day from the lookback date
# - verbose=verbose: Print details if verbose mode is enabled

++++++++++++++
date: 2023-12-20
n: 1
next_date_n: None
close_date_n: None
syms: ['GPS', 'SHV']
++++++++++++++
l_syms: ['GPS', 'SHV']
sym_not_in_df_close: False
No data for close_date_n None, pick's portf value = None
No data for close_date_n None, SPY portf value =    None

++++++++++++++
date: 2023-12-19
n: 1
next_date_n: 2023-12-20
close_date_n: 2023-12-20
syms: ['GPS', 'SHV']
++++++++++++++
date = 2023-12-20, l_syms = ['GPS', 'SHV'], ar_price = [ 20.68000031 109.91999817], ar_shares = [24.  4.], ar_value = [496.32000732 439.67999268], portf_value = 936.0
2023-12-20 portf_value = 936.0
date = 2023-12-20, l_syms = ['SPY'], ar_price = [468.26000977], ar_shares = [2.], ar_value = [936.52001953], portf_value = 936.52001953125
2023-12-20 portf_value = 936.52001953125
close_date_n pick's portf value = 936.0
close_date_n SPY portf value =    936.52001953125

++++++++++++++
date: 2023-12-18
n: 1
next_date_n: 2023-12-19
close_date_n: 2023-12-19
syms: ['SHV']
++++++++++++++
date = 2023-12-19, 

In [336]:
"""
Adds buy information, including dates, shares, and values, for both the portfolio and SPY to the df_picks_mp DataFrame.
"""

# Add columns for portfolio buy information:
df_picks_mp['date_buy'] = date_buy  # Date of portfolio purchase
df_picks_mp['sh_portf_buy'] = shares_syms  # Number of shares bought for each symbol
df_picks_mp['$_portf_buy'] = value_portf  # Total value of the portfolio at purchase

# Add columns for SPY buy information:
df_picks_mp['sh_SPY_buy'] = shares_SPY  # Number of SPY shares bought
df_picks_mp['$_SPY_buy'] = value_SPY  # Total value of SPY at purchase

# Display the updated DataFrame with all buy and sell information:
df_picks_mp  # Show the DataFrame with the added buy columns

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy
0,2023-12-20,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",,,,,
1,2023-12-19,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-20,"[24.0, 4.0]",936.000000,[2.0],936.520020
2,2023-12-18,"[30, 60, 120]",sym_freq_8,['SHV'],2023-12-19,[9.0],989.009995,[2.0],949.679993
3,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],2023-12-18,[9.0],989.009995,[2.0],943.940002
4,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],940.208008
...,...,...,...,...,...,...,...,...,...
288,2023-03-20,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-21,"[8.0, 4.0]",883.741394,[2.0],788.875000
289,2023-03-17,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-20,"[8.0, 4.0]",884.320007,[2.0],778.650940
290,2023-03-17,"[30, 60, 120]",sym_freq_8,['GE'],2023-03-20,[11.0],987.678345,[2.0],778.650940
291,2023-03-16,"[30, 60, 120]",sym_freq_9,"['FTSM', 'SHV']",2023-03-17,"[8.0, 4.0]",883.701630,[2.0],771.235046


In [337]:
"""
Calculates portfolio values for multiple dates and symbol combinations, using pre-determined shares and handling potential missing data.
"""

# Call the function to calculate portfolio values:
date_exec, shares_syms, value_portf, shares_SPY, value_SPY = \
   calc_portf_value_date_n(df_picks_mp.date, df_picks_mp.symbols, df_close, df_picks_mp.sh_portf_buy, df_picks_mp.sh_SPY_buy, n=4, verbose=verbose)

# Breakdown of the function call:
# - calc_portf_value_date_n: Function name (defined in a previous code block)
# - df_picks_mp.date: List of dates from the df_picks_mp DataFrame
# - df_picks_mp.symbols: List of stock symbols from the df_picks_mp DataFrame
# - df_close: DataFrame containing historical closing prices
# - df_picks_mp.sh_portf_buy: List of shares bought for each symbol (from a previous calculation)
# - df_picks_mp.sh_SPY_buy: List of SPY shares bought (from a previous calculation)
# - n=4: Offset the execution date by 4 trading days from the lookback date
# - verbose=verbose: Print details if verbose mode is enabled

# Note: The function handles potential missing data gracefully.

No data for close_date_n None, pick's portf value = None

No data for close_date_n None, pick's portf value = None

No data for close_date_n None, pick's portf value = None

No data for close_date_n None, pick's portf value = None

date = 2023-12-19, l_syms = ['GPS', 'SHV'], ar_price = [ 21.60000038 109.88999939], ar_shares = [23.  4.], ar_value = [496.80000877 439.55999756], portf_value = 936.3600063323975
2023-12-19 portf_value = 936.3600063323975
date = 2023-12-19, l_syms = ['SPY'], ar_price = [474.83999634], ar_shares = [2.], ar_value = [949.67999268], portf_value = 949.6799926757812
2023-12-19 portf_value = 949.6799926757812
next_date_n pick's portf value = 936.3600063323975
next_date_n SPY portf value =    949.6799926757812

date = 2023-12-18, l_syms = ['GPS', 'SHV'], ar_price = [ 20.96999931 109.88999939], ar_shares = [23.  4.], ar_value = [482.30998421 439.55999756], portf_value = 921.8699817657471
2023-12-18 portf_value = 921.8699817657471
date = 2023-12-18, l_syms = ['SPY'], 

In [338]:
"""
Adds calculated portfolio values, performance metrics, and a comparison to SPY to the df_picks_mp DataFrame.
"""

# Add columns for portfolio sell information:
df_picks_mp['date_sell'] = date_exec  # Date of portfolio sale
df_picks_mp['sh_portf_sell'] = shares_syms  # Number of shares sold for each symbol
df_picks_mp['$_portf_sell'] = value_portf  # Total value of the portfolio at sale

# Calculate percentage change for the portfolio:
df_picks_mp['%_portf_chg'] = (df_picks_mp['$_portf_sell'] / df_picks_mp['$_portf_buy'] - 1) * 100

# Add columns for SPY sell information:
df_picks_mp['sh_SPY_sell'] = shares_SPY  # Number of SPY shares sold
df_picks_mp['$_SPY_sell'] = value_SPY  # Total value of SPY at sale

# Calculate percentage change for SPY:
df_picks_mp['%_SPY_chg'] = (df_picks_mp['$_SPY_sell'] / df_picks_mp['$_SPY_buy'] - 1) * 100

# Calculate the difference in percentage change between the portfolio and SPY:
df_picks_mp['dif_%_chg'] = df_picks_mp['%_portf_chg'] - df_picks_mp['%_SPY_chg']

In [339]:
"""
Adds a "win" column to indicate outperformance of SPY and displays the first 20 rows of the DataFrame.
"""

# Add a "win" column to indicate whether the portfolio outperformed SPY:
df_picks_mp['win'] = np.where(df_picks_mp['dif_%_chg'] > 0, 1, 0)  # 1 for outperformance, 0 otherwise

# Display the first 20 rows of the DataFrame for review:
df_picks_mp.head(20)  # Show the initial rows with the added columns

Unnamed: 0,date,days_lookback,syms_freq,symbols,date_buy,sh_portf_buy,$_portf_buy,sh_SPY_buy,$_SPY_buy,date_sell,sh_portf_sell,$_portf_sell,%_portf_chg,sh_SPY_sell,$_SPY_sell,%_SPY_chg,dif_%_chg,win
0,2023-12-20,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",,,,,,,,,,,,,,0
1,2023-12-19,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-20,"[24.0, 4.0]",936.0,[2.0],936.52002,,,,,,,,,0
2,2023-12-18,"[30, 60, 120]",sym_freq_8,['SHV'],2023-12-19,[9.0],989.009995,[2.0],949.679993,,,,,,,,,0
3,2023-12-15,"[30, 60, 120]",sym_freq_8,['SHV'],2023-12-18,[9.0],989.009995,[2.0],943.940002,,,,,,,,,0
4,2023-12-13,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-14,"[23.0, 4.0]",929.339985,[2.0],940.208008,2023-12-19,"[23.0, 4.0]",936.360006,0.755377,[2.0],949.679993,1.007435,-0.252058,0
5,2023-12-12,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-13,"[23.0, 4.0]",927.513988,[2.0],937.200195,2023-12-18,"[23.0, 4.0]",921.869982,-0.608509,[2.0],943.940002,0.719143,-1.327652,0
6,2023-12-11,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-12,"[23.0, 4.0]",928.96471,[2.0],924.451904,2023-12-15,"[23.0, 4.0]",929.609999,0.069463,[2.0],938.659973,1.536918,-1.467455,0
7,2023-12-08,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-11,"[22.0, 4.0]",921.405289,[2.0],920.248901,2023-12-14,"[22.0, 4.0]",908.039986,-1.450535,[2.0],940.208008,2.168881,-3.619416,0
8,2023-12-07,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-08,"[23.0, 4.0]",937.585289,[2.0],916.683411,2023-12-13,"[23.0, 4.0]",927.513988,-1.074174,[2.0],937.200195,2.238154,-3.312328,0
9,2023-12-06,"[30, 60, 120]",sym_freq_8,"['GPS', 'SHV']",2023-12-07,"[23.0, 4.0]",930.455301,[2.0],912.759338,2023-12-12,"[23.0, 4.0]",928.96471,-0.1602,[2.0],924.451904,1.281013,-1.441213,0


#### Model Performance Not Good

In [340]:
"""
Calculates win rate and overall performance relative to SPY.
"""

# Count the number of wins (outperformances of SPY):
wins = df_picks_mp['win'].sum()  # Sum the "win" column to get the total wins

# Count the number of valid attempts (excluding missing data):
attempts = len(df_picks_mp['dif_%_chg'].dropna())  # Count rows with non-null values in 'dif_%_chg'

# Calculate the win rate:
win_rate = wins / attempts  # Divide wins by attempts to get the win rate

# Print the win rate, wins, and attempts:
print(f'win_rate: {win_rate:0.6f}, wins: {wins}, attempts: {attempts}')

# Print the overall difference in percentage change between the portfolio and SPY:
print(f'sum(df_picksf_%_chg): {df_picks_mp["dif_%_chg"].sum():0.6f}')  # Sums the differences to indicate overall outperformance or underperformance

win_rate: 0.419014, wins: 119, attempts: 284
sum(df_picksf_%_chg): -105.312848
