In [None]:
# import sys
# from pathlib import Path
# import pandas as pd
# import os
# from IPython.display import display, Markdown  # Assuming you use these for display

# # --- 1. PANDAS OPTIONS (No change) ---
# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', 1500)

# # --- 2. IPYTHON AUTORELOAD (No change) ---
# %load_ext autoreload
# %autoreload 2

# # --- 3. ROBUST PATH CONFIGURATION (MODIFIED) ---

# # Get the current working directory of the notebook
# NOTEBOOK_DIR = Path.cwd()

# # Find the project ROOT directory by going up from the notebook's location
# # This is robust and works even if you move the notebook deeper.
# ROOT_DIR = NOTEBOOK_DIR.parent.parent

# # You could also define an output directory here if needed
# OUTPUT_DIR = ROOT_DIR / 'output'
# DATA_DIR = ROOT_DIR / 'data'

# # Define key project directories relative to the ROOT
# SELECTION_RESULTS = OUTPUT_DIR  / 'selection_results'
# BACKTEST_RESULTS = OUTPUT_DIR / 'backtest_results'
# SRC_DIR = ROOT_DIR / 'src'


# # Add the 'src' directory to the Python path so you can import 'utils'
# if str(SRC_DIR) not in sys.path:
#     sys.path.append(str(SRC_DIR))

# # --- 4. VERIFICATION (IMPROVED) ---
# print(f"✅ Project Root Directory: {ROOT_DIR}")
# print(f"✅ Source Directory (for utils): {SRC_DIR}")
# print(f"✅ Selection Results Directory (for input): {SELECTION_RESULTS}")

# # Verify that the key directories exist. This helps catch path errors early.
# assert ROOT_DIR.exists(), f"ROOT directory not found at: {ROOT_DIR}"
# assert SRC_DIR.exists(), f"Source directory not found at: {SRC_DIR}"
# assert SELECTION_RESULTS.exists(), f"Data directory not found at: {SELECTION_RESULTS}"

# # --- 5. IMPORT YOUR CUSTOM MODULE ---
# # This will now work correctly
# import utils
# from config import DATE_STR

# print("\n✅ Successfully imported 'utils' module and DATE_STR.")

In [51]:
import sys
from pathlib import Path
import pandas as pd
from IPython.display import display, Markdown

# --- 1. PANDAS OPTIONS (No change) ---
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1500)

# --- 2. IPYTHON AUTORELOAD (No change, assuming you're in a notebook) ---
# %load_ext autoreload
# %autoreload 2

# --- 3. ROBUST PATH CONFIGURATION (FIXED FOR YOUR STRUCTURE) ---

# Get the directory where your script is currently running
CURRENT_DIR = Path.cwd()  # This will be '.../notebooks/_working'

# The directory containing 'config.py' is the parent of the current directory
CONFIG_DIR = CURRENT_DIR.parent # This will be '.../notebooks'

# Add the directory containing config.py to the Python path
# so that the import statement `from config import ...` can find it.
if str(CONFIG_DIR) not in sys.path:
    sys.path.append(str(CONFIG_DIR))

# --- 4. VERIFICATION ---
print(f"✅ Current Script Directory: {CURRENT_DIR}")
print(f"✅ Directory containing config.py (added to path): {CONFIG_DIR}")

# Assert that the config file we want to import actually exists in that location.
# This gives a much clearer error if the path is wrong.
config_file_path = CONFIG_DIR / 'config.py'
assert config_file_path.exists(), f"Error: config.py not found at {config_file_path}"


# --- 5. IMPORT YOUR CUSTOM MODULE ---
# This will now work correctly because its parent directory ('.../notebooks/') is on the path.
from config import DATE_STR
# Note: The import for 'utils' is removed as its location is not specified
# and the old SRC_DIR path is no longer valid for this structure.
# import utils

print("\n✅ Successfully imported DATE_STR from config.py.")
print(f"✅ The value of DATE_STR is: '{DATE_STR}'")


# --- The following paths are commented out as they are based on a
# --- 'standard project structure' you plan to adopt later.

ROOT_DIR = CURRENT_DIR.parent.parent # This would point to the folder containing 'notebooks'
OUTPUT_DIR = ROOT_DIR / 'output'
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'
SELECTION_RESULTS = OUTPUT_DIR  / 'selection_results'
BACKTEST_RESULTS = OUTPUT_DIR / 'backtest_results'
# SRC_DIR = ROOT_DIR / 'src'
# ... and so on

✅ Current Script Directory: c:\Users\ping\Files_win10\python\py311\stocks\notebooks\_working
✅ Directory containing config.py (added to path): c:\Users\ping\Files_win10\python\py311\stocks\notebooks

✅ Successfully imported DATE_STR from config.py.
✅ The value of DATE_STR is: '2025-06-10'


In [65]:
# Get a list of all .parquet files in the directory
# Using .glob('*.parquet') is a safe way to get only the files you want
# Gets all files ending with .parquet that also contain 'df_finviz'
_file_list = [f.name for f in SELECTION_RESULTS.glob(f"*{DATE_STR}*")]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. 2025-06-10_short_term_mean_reversion.csv
1. 2025-06-10_short_term_mean_reversion.parquet
2. 2025-06-10_short_term_mean_reversion_params.json


In [53]:
df_selections = pd.read_parquet(SELECTION_RESULTS / _file_list[1])
print(f'df_selections:\n{df_selections}')

df_selections:
          RSI  Change %  ATR/Price %  Avg Volume, M  Rel Volume  Debt/Eq  ROE %   Price     z_RSI  z_Change%  z_RelVolume  z_ATR/Price%  final_score  Weight_EW  Weight_IV  Weight_SW
Ticker                                                                                                                                                                               
DOCU    38.68     -1.96     4.945483           2.95        1.67     0.07  70.20   77.04 -1.959106  -1.490627     3.400199      2.190313     1.668415        0.1   0.063237   0.116645
BF-B    24.48      0.40     4.285200           3.43        1.51     0.68  23.14   27.77 -3.415263  -0.108550     2.825392      1.528253     1.645588        0.1   0.072981   0.115049
CPRT    22.82      0.26     2.466189           5.84        1.08     0.01  18.54   50.28 -3.585489  -0.190538     1.280600     -0.295652     1.607295        0.1   0.126810   0.112372
SE      50.45     -4.68     3.576210           5.09        1.05     0.46  1

In [73]:
df_selections.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, DOCU to CME
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   RSI            10 non-null     float64
 1   Change %       10 non-null     float64
 2   ATR/Price %    10 non-null     float64
 3   Avg Volume, M  10 non-null     float64
 4   Rel Volume     10 non-null     float64
 5   Debt/Eq        10 non-null     float64
 6   ROE %          10 non-null     float64
 7   Price          10 non-null     float64
 8   z_RSI          10 non-null     float64
 9   z_Change%      10 non-null     float64
 10  z_RelVolume    10 non-null     float64
 11  z_ATR/Price%   10 non-null     float64
 12  final_score    10 non-null     float64
 13  Weight_EW      10 non-null     float64
 14  Weight_IV      10 non-null     float64
 15  Weight_SW      10 non-null     float64
dtypes: float64(16)
memory usage: 1.3+ KB


In [54]:
tickers = df_selections.index.to_list()
tickers

['DOCU', 'BF-B', 'CPRT', 'SE', 'GE', 'PGR', 'CART', 'LULU', 'LTH', 'CME']

In [71]:
_file_list = [f.name for f in DATA_DIR.glob('*adj*.parquet')]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. df_adj_close.parquet


In [None]:
df_adj = pd.read_parquet(DATA_DIR / _file_list[0])
start_index_pos = df_adj.index.get_loc(DATE_STR)
end_index_pos = start_index_pos + 6
df_adj = df_adj.iloc[start_index_pos:end_index_pos]
df_adj = df_adj.loc[:, tickers]
print(f"df_adj:\n{_df_adj}")

df_adj:
Ticker       DOCU   BF-B   CPRT      SE      GE     PGR   CART    LULU    LTH     CME
Date                                                                                 
2025-06-10  77.04  27.77  50.28  156.87  242.00  265.49  44.18  258.50  27.78  266.10
2025-06-11  76.24  27.50  50.50  154.44  245.52  263.22  44.17  252.28  27.49  269.65
2025-06-12  76.01  27.29  49.99  154.63  239.99  268.42  44.08  247.03  28.15  270.96
2025-06-13  74.06  26.44  48.59  154.38  236.60  267.85  43.37  239.11  27.14  269.50


In [74]:
df_adj.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4 entries, 2025-06-10 to 2025-06-13
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   DOCU    4 non-null      float64
 1   BF-B    4 non-null      float64
 2   CPRT    4 non-null      float64
 3   SE      4 non-null      float64
 4   GE      4 non-null      float64
 5   PGR     4 non-null      float64
 6   CART    4 non-null      float64
 7   LULU    4 non-null      float64
 8   LTH     4 non-null      float64
 9   CME     4 non-null      float64
dtypes: float64(10)
memory usage: 352.0 bytes


In [76]:
import pandas as pd
import numpy as np

# --- Recreate DataFrames based on your .info() ---

# Tickers from df_adj columns and df_selections index
tickers = ['DOCU', 'BF-B', 'CPRT', 'SE', 'GE', 'PGR', 'CART', 'LULU', 'LTH', 'CME']

# Create df_selections
# We'll generate random data but ensure the weights in each column sum to 1
data_selections = np.random.rand(10, 16)
df_selections = pd.DataFrame(data_selections, index=tickers, columns=[
    'RSI', 'Change %', 'ATR/Price %', 'Avg Volume, M', 'Rel Volume', 
    'Debt/Eq', 'ROE %', 'Price', 'z_RSI', 'z_Change%', 'z_RelVolume', 
    'z_ATR/Price%', 'final_score', 'Weight_EW', 'Weight_IV', 'Weight_SW'
])

# Normalize weight columns to sum to 1, as portfolio weights should
for col in ['Weight_EW', 'Weight_IV', 'Weight_SW']:
    df_selections[col] = df_selections[col] / df_selections[col].sum()

# Create df_adj
dates = pd.to_datetime(['2025-06-10', '2025-06-11', '2025-06-12', '2025-06-13'])
# Generate some plausible, slightly varying stock prices
price_data = 100 + np.random.randn(4, 10).cumsum(axis=0)
df_adj = pd.DataFrame(price_data, index=dates, columns=tickers)

print("--- Sample df_selections (weights normalized) ---")
print(df_selections[['Weight_EW', 'Weight_IV', 'Weight_SW']].head())
print("\n--- Sample df_adj (prices) ---")
print(df_adj)

--- Sample df_selections (weights normalized) ---
      Weight_EW  Weight_IV  Weight_SW
DOCU   0.184034   0.018893   0.135726
BF-B   0.110222   0.137112   0.138466
CPRT   0.023160   0.127488   0.110389
SE     0.028547   0.061405   0.085478
GE     0.162411   0.030382   0.114673

--- Sample df_adj (prices) ---
                 DOCU        BF-B       CPRT          SE         GE         PGR        CART        LULU         LTH         CME
2025-06-10  99.858234  100.451057  99.993610  101.420205  98.681946  101.215458  101.701900  101.071670   99.993163  100.688834
2025-06-11  98.715731  100.435652  99.004629  103.343145  98.242306   98.737278  101.557866  100.423784  100.971974   99.733332
2025-06-12  97.094794   98.959976  99.134988  102.503836  98.409268   98.185965  101.680125  100.020151  101.585004  100.533251
2025-06-13  97.495814   97.581920  99.192663  103.482191  99.470472   98.311628   99.245495  100.818473  101.371873  100.657869


In [None]:
import pandas as pd
import numpy as np

# # --- Recreate DataFrames (assuming this setup is correct) ---
# tickers = ['DOCU', 'BF-B', 'CPRT', 'SE', 'GE', 'PGR', 'CART', 'LULU', 'LTH', 'CME']
# data_selections = np.random.rand(10, 16)
# df_selections = pd.DataFrame(data_selections, index=tickers, columns=[
#     'RSI', 'Change %', 'ATR/Price %', 'Avg Volume, M', 'Rel Volume', 
#     'Debt/Eq', 'ROE %', 'Price', 'z_RSI', 'z_Change%', 'z_RelVolume', 
#     'z_ATR/Price%', 'final_score', 'Weight_EW', 'Weight_IV', 'Weight_SW'
# ])
# for col in ['Weight_EW', 'Weight_IV', 'Weight_SW']:
#     df_selections[col] = df_selections[col] / df_selections[col].sum()
# dates = pd.to_datetime(['2025-06-10', '2025-06-11', '2025-06-12', '2025-06-13'])
# price_data = 100 + np.random.randn(4, 10).cumsum(axis=0)
# df_adj = pd.DataFrame(price_data, index=dates, columns=tickers)


# --- Main Calculation Logic ---

# 1. Define Dates
DATE_STR = '2025-06-10'
buy_date = pd.to_datetime(DATE_STR) + pd.Timedelta(days=1)
sell_date = buy_date + pd.Timedelta(days=1)

print(f"Buy Date: {buy_date.date()}")
print(f"Sell Date: {sell_date.date()}")

# 2. Extract prices for buy and sell dates
buy_prices = df_adj.loc[buy_date]
sell_prices = df_adj.loc[sell_date]

# 3. Calculate individual stock returns
individual_returns = (sell_prices / buy_prices) - 1

# 4. Calculate portfolio performance
weights_df = df_selections[['Weight_EW', 'Weight_IV', 'Weight_SW']]
weighted_returns = weights_df.multiply(individual_returns, axis=0)
portfolio_performance = weighted_returns.sum()

# 5. Display the final results (CORRECTED aPPROACH)

print("\n--- Individual Stock Returns ---")
# Just print the Series directly. Pandas will format it nicely.
# To show as percentages, we can map a formatting function.
print((individual_returns * 100).map('{:.2f}%'.format))


print("\n--- Portfolio Performance ---")
# Rename for clarity and print the final Series
portfolio_performance.name = "Portfolio Return"
print((portfolio_performance * 100).map('{:.4f}%'.format))

Buy Date: 2025-06-11
Sell Date: 2025-06-12

--- Individual Stock Returns ---
DOCU    -0.53%
BF-B     0.15%
CPRT    -1.04%
SE       0.07%
GE       2.45%
PGR      0.15%
CART    -1.38%
LULU     0.13%
LTH     -0.42%
CME      0.92%
dtype: object

--- Portfolio Performance ---
Weight_EW    -0.5048%
Weight_IV     0.1800%
Weight_SW    -0.2296%
Name: Portfolio Return, dtype: object


In [77]:
# --- Main Calculation Logic ---

# 1. Define Dates
# DATE_STR = '2025-06-10'
buy_date = pd.to_datetime(DATE_STR) + pd.Timedelta(days=1)
sell_date = buy_date + pd.Timedelta(days=1)

print(f"\nBuy Date: {buy_date.date()}")
print(f"Sell Date: {sell_date.date()}\n")

# 2. Extract prices for buy and sell dates
# Using .loc to select rows by their index (the dates)
buy_prices = df_adj.loc[buy_date]
sell_prices = df_adj.loc[sell_date]

# 3. Calculate individual stock returns for the holding period
# Return = (Sell Price / Buy Price) - 1
individual_returns = (sell_prices / buy_prices) - 1

print("--- Individual Stock Returns ---")
print(individual_returns.to_frame('Return').style.format({'Return': '{:.2%}'}))

# 4. Calculate the performance for each portfolio
# Portfolio Return = sum(weight_of_stock * return_of_stock)

# Get the weight columns from df_selections
weights_df = df_selections[['Weight_EW', 'Weight_IV', 'Weight_SW']]

# Multiply the individual returns by the weights for each portfolio
# The DataFrames are aligned by their index (the stock tickers)
weighted_returns = weights_df.multiply(individual_returns, axis=0)

# Sum the weighted returns for each portfolio to get the final performance
portfolio_performance = weighted_returns.sum()

# 5. Display the final results
print("\n--- Portfolio Performance ---")
# Rename for clarity and format as percentage
portfolio_performance.name = "Portfolio Return"
print(portfolio_performance.to_frame().style.format({'Portfolio Return': '{:.4%}'}))


Buy Date: 2025-06-11
Sell Date: 2025-06-12

--- Individual Stock Returns ---
<pandas.io.formats.style.Styler object at 0x0000026A64D3D4D0>

--- Portfolio Performance ---
<pandas.io.formats.style.Styler object at 0x0000026A64933D90>


In [62]:
_file_list = [f.name for f in BACKTEST_RESULTS.glob('*')]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. backtest_master_results.csv
1. backtest_master_results.parquet


In [63]:
df_backtest = pd.read_parquet(BACKTEST_RESULTS / _file_list[1])
print(f'df_backtest:\n{df_backtest.head(6)}')

df_backtest:
  actual_selection_date_used  average_return  filter_max_debt_eq  filter_min_avg_volume_m  filter_min_price  filter_min_roe_pct inv_vol_col_name                          log_file  n_select_actual  n_select_requested  num_attempted_trades  num_failed_or_skipped_trades  num_selected_tickers  num_successful_trades  portfolio_return  portfolio_return_normalized       run_timestamp scheme  score_weight_change  score_weight_rel_volume  score_weight_rsi  score_weight_volatility selection_date  sharpe_ratio_period  std_dev_return  total_weight_traded  win_rate
0                 2025-06-11       -0.017819                 1.5                      2.0              10.0                 5.0      ATR/Price %  backtest_run_20250616_162044.log               10                  10                    10                             0                    10                     10         -0.017819                    -0.017819 2025-06-16 16:20:44     EW                 0.35                     

In [64]:
# 1. Create the boolean mask
condition = df_backtest ['actual_selection_date_used'] == '2025-06-10'

# 2. Use the mask with .loc to select the rows
_df_backtest = df_backtest.loc[condition]

print(f'_df_backtest:\n{_df_backtest}')

_df_backtest:
  actual_selection_date_used  average_return  filter_max_debt_eq  filter_min_avg_volume_m  filter_min_price  filter_min_roe_pct inv_vol_col_name                          log_file  n_select_actual  n_select_requested  num_attempted_trades  num_failed_or_skipped_trades  num_selected_tickers  num_successful_trades  portfolio_return  portfolio_return_normalized       run_timestamp scheme  score_weight_change  score_weight_rel_volume  score_weight_rsi  score_weight_volatility selection_date  sharpe_ratio_period  std_dev_return  total_weight_traded  win_rate
3                 2025-06-10       -0.001627                 1.5                      2.0              10.0                 5.0      ATR/Price %  backtest_run_20250616_162044.log               10                  10                    10                             0                    10                     10         -0.001627                    -0.001627 2025-06-16 16:20:44     EW                 0.35                    