In [33]:
import sys
from pathlib import Path
import pandas as pd
from IPython.display import display, Markdown

# --- 1. PANDAS OPTIONS (No change) ---
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1500)

# --- 2. IPYTHON AUTORELOAD (No change, assuming you're in a notebook) ---
# %load_ext autoreload
# %autoreload 2

# --- 3. ROBUST PATH CONFIGURATION (FIXED FOR YOUR STRUCTURE) ---

# Get the directory where your script is currently running
CURRENT_DIR = Path.cwd()  # This will be '.../notebooks/_working'

# The directory containing 'config.py' is the parent of the current directory
CONFIG_DIR = CURRENT_DIR.parent # This will be '.../notebooks'

# Add the directory containing config.py to the Python path
# so that the import statement `from config import ...` can find it.
if str(CONFIG_DIR) not in sys.path:
    sys.path.append(str(CONFIG_DIR))

# --- 4. VERIFICATION ---
print(f"✅ Current Script Directory: {CURRENT_DIR}")
print(f"✅ Directory containing config.py (added to path): {CONFIG_DIR}")

# Assert that the config file we want to import actually exists in that location.
# This gives a much clearer error if the path is wrong.
config_file_path = CONFIG_DIR / 'config.py'
assert config_file_path.exists(), f"Error: config.py not found at {config_file_path}"


# --- 5. IMPORT YOUR CUSTOM MODULE ---
# This will now work correctly because its parent directory ('.../notebooks/') is on the path.
from config import DATE_STR, RISK_FREE_RATE_DAILY
# Note: The import for 'utils' is removed as its location is not specified
# and the old SRC_DIR path is no longer valid for this structure.
# import utils

print("\n✅ Successfully imported DATE_STR from config.py.")
print(f"✅ The value of DATE_STR is: '{DATE_STR}'")


# --- The following paths are commented out as they are based on a
# --- 'standard project structure' you plan to adopt later.

ROOT_DIR = CURRENT_DIR.parent.parent # This would point to the folder containing 'notebooks'
OUTPUT_DIR = ROOT_DIR / 'output'
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'
SELECTION_RESULTS = OUTPUT_DIR  / 'selection_results'
BACKTEST_RESULTS = OUTPUT_DIR / 'backtest_results'
# SRC_DIR = ROOT_DIR / 'src'
# ... and so on

✅ Current Script Directory: c:\Users\ping\Files_win10\python\py311\stocks\notebooks\_working
✅ Directory containing config.py (added to path): c:\Users\ping\Files_win10\python\py311\stocks\notebooks

✅ Successfully imported DATE_STR from config.py.
✅ The value of DATE_STR is: '2025-06-11'


In [16]:
# Get a list of all .parquet files in the directory
# Using .glob('*.parquet') is a safe way to get only the files you want
# Gets all files ending with .parquet that also contain 'df_finviz'
_file_list = [f.name for f in SELECTION_RESULTS.glob(f"*{DATE_STR}*")]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. 2025-06-11_short_term_mean_reversion.csv
1. 2025-06-11_short_term_mean_reversion.parquet
2. 2025-06-11_short_term_mean_reversion_params.json


In [17]:
df_selections = pd.read_parquet(SELECTION_RESULTS / _file_list[1])
print(f'df_selections:\n{df_selections}')

df_selections:
        ROE %   Price    RSI  Change %  Rel Volume  Avg Volume, M  Debt/Eq  ATR/Price %     z_RSI  z_Change%  z_RelVolume  z_ATR/Price%  final_score  Weight_EW  Weight_IV  Weight_SW
Ticker                                                                                                                                                                               
CWAN    58.14   23.10  47.68     -6.69        2.91           3.06     0.06     3.376623 -0.906428  -3.692336     7.347584      0.634664     3.015618        0.1   0.094762   0.197792
NUE      6.54  116.66  49.38     -6.43        1.03           2.60     0.39     3.900223 -0.738860  -3.547220     1.219603      1.167119     1.627337        0.1   0.082041   0.106736
LULU    42.49  252.53  30.34     -2.31        1.42           2.77     0.40     5.084544 -2.615622  -1.247691     2.490833      2.371468     1.613179        0.1   0.062931   0.105807
BF-B    23.14   27.66  24.18     -0.41        1.48           3.44     0.68 

In [18]:
df_selections.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, CWAN to CPRT
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ROE %          10 non-null     float64
 1   Price          10 non-null     float64
 2   RSI            10 non-null     float64
 3   Change %       10 non-null     float64
 4   Rel Volume     10 non-null     float64
 5   Avg Volume, M  10 non-null     float64
 6   Debt/Eq        10 non-null     float64
 7   ATR/Price %    10 non-null     float64
 8   z_RSI          10 non-null     float64
 9   z_Change%      10 non-null     float64
 10  z_RelVolume    10 non-null     float64
 11  z_ATR/Price%   10 non-null     float64
 12  final_score    10 non-null     float64
 13  Weight_EW      10 non-null     float64
 14  Weight_IV      10 non-null     float64
 15  Weight_SW      10 non-null     float64
dtypes: float64(16)
memory usage: 1.3+ KB


In [19]:
tickers = df_selections.index.to_list()
tickers

['CWAN', 'NUE', 'LULU', 'BF-B', 'GME', 'AMH', 'ROST', 'CAVA', 'HBAN', 'CPRT']

In [20]:
_file_list = [f.name for f in DATA_DIR.glob('*adj*.parquet')]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. df_adj_close.parquet


In [21]:
df_adj = pd.read_parquet(DATA_DIR / _file_list[0])
start_index_pos = df_adj.index.get_loc(DATE_STR)
end_index_pos = start_index_pos + 6
df_adj = df_adj.iloc[start_index_pos:end_index_pos]
df_adj = df_adj.loc[:, tickers]
print(f"df_adj:\n{df_adj}")

df_adj:
Ticker       CWAN     NUE    LULU   BF-B    GME      AMH    ROST   CAVA   HBAN   CPRT
Date                                                                                 
2025-06-11  23.10  117.13  252.28  27.50  28.55  36.0423  136.39  76.83  16.09  50.50
2025-06-12  22.82  118.45  247.03  27.29  22.14  36.3200  133.47  78.25  15.96  49.99
2025-06-13  22.11  121.92  239.11  26.44  22.14  36.4000  131.85  74.57  15.51  48.59


In [22]:
df_adj.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3 entries, 2025-06-11 to 2025-06-13
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   CWAN    3 non-null      float64
 1   NUE     3 non-null      float64
 2   LULU    3 non-null      float64
 3   BF-B    3 non-null      float64
 4   GME     3 non-null      float64
 5   AMH     3 non-null      float64
 6   ROST    3 non-null      float64
 7   CAVA    3 non-null      float64
 8   HBAN    3 non-null      float64
 9   CPRT    3 non-null      float64
dtypes: float64(10)
memory usage: 264.0 bytes


--- Portfolio Performance --- is the sum and calculated using risk-free-rate=0,    
--- Daily Sharpe Ratio (using cross-sectional std dev) --- ????

In [34]:
import pandas as pd
import numpy as np


# --- Main Calculation Logic ---

buy_date = pd.to_datetime(DATE_STR) + pd.Timedelta(days=1)
sell_date = buy_date + pd.Timedelta(days=1)

print(f"Buy Date: {buy_date.date()}")
print(f"Sell Date: {sell_date.date()}")
print(f"Daily Risk-Free Rate: {RISK_FREE_RATE_DAILY:.6f}")


# 2. Extract prices for buy and sell dates
buy_prices = df_adj.loc[buy_date]
sell_prices = df_adj.loc[sell_date]

# 3. Calculate individual stock returns
individual_returns = (sell_prices / buy_prices) - 1

# --- START OF MODIFICATIONS ---

# 3a. Calculate the standard deviation of the individual stock returns for the day
std_dev_daily_returns = individual_returns.std()

# --- END OF MODIFICATIONS ---

# 4. Calculate portfolio performance
weights_df = df_selections[['Weight_EW', 'Weight_IV', 'Weight_SW']]
weighted_returns = weights_df.multiply(individual_returns, axis=0)
portfolio_performance = weighted_returns.sum()

# --- START OF MODIFICATIONS ---

# 5. Calculate Daily Sharpe Ratio
# This uses the standard deviation of individual stock returns as the denominator,
# as we only have a single day of portfolio data.
# It measures portfolio excess return against the cross-sectional volatility of its components.
if std_dev_daily_returns > 0:
    daily_sharpe_ratio = (portfolio_performance - RISK_FREE_RATE_DAILY) / std_dev_daily_returns
else:
    # Handle case with no volatility to avoid division by zero
    daily_sharpe_ratio = pd.Series([np.nan] * len(portfolio_performance), index=portfolio_performance.index)
    
# --- END OF MODIFICATIONS ---


# 6. Display the final results

print("\n--- Individual Stock Returns ---")
print((individual_returns).map('{:.6f}'.format))


print("\n--- Portfolio Performance ---")
portfolio_performance.name = "Portfolio Return"
print((portfolio_performance).map('{:.4f}'.format))

# --- START OF MODIFICATIONS ---

print("\n--- Daily Risk Metrics ---")
print(f"Standard Deviation of Individual Stock Returns: {std_dev_daily_returns:.6f}")

print("\n--- Daily Sharpe Ratio (using cross-sectional std dev) ---")
daily_sharpe_ratio.name = "Daily Sharpe Ratio"
print(daily_sharpe_ratio.map('{:.4f}'.format))

# --- END OF MODIFICATIONS ---

Buy Date: 2025-06-12
Sell Date: 2025-06-13
Daily Risk-Free Rate: 0.000159

--- Individual Stock Returns ---
Ticker
CWAN    -0.031113
NUE      0.029295
LULU    -0.032061
BF-B    -0.031147
GME      0.000000
AMH      0.002203
ROST    -0.012138
CAVA    -0.047029
HBAN    -0.028195
CPRT    -0.028006
dtype: object

--- Portfolio Performance ---
Weight_EW    -0.0178
Weight_IV    -0.0172
Weight_SW    -0.0184
Name: Portfolio Return, dtype: object

--- Daily Risk Metrics ---
Standard Deviation of Individual Stock Returns: 0.022589

--- Daily Sharpe Ratio (using cross-sectional std dev) ---
Weight_EW    -0.7959
Weight_IV    -0.7694
Weight_SW    -0.8205
Name: Daily Sharpe Ratio, dtype: object


In [30]:
import pandas as pd
import numpy as np

# --- Main Calculation Logic ---

# 1. Define Dates
# DATE_STR = '2025-06-10'
buy_date = pd.to_datetime(DATE_STR) + pd.Timedelta(days=1)
sell_date = buy_date + pd.Timedelta(days=1)

print(f"Buy Date: {buy_date.date()}")
print(f"Sell Date: {sell_date.date()}")

# 2. Extract prices for buy and sell dates
buy_prices = df_adj.loc[buy_date]
sell_prices = df_adj.loc[sell_date]

# 3. Calculate individual stock returns
individual_returns = (sell_prices / buy_prices) - 1

# 4. Calculate portfolio performance
weights_df = df_selections[['Weight_EW', 'Weight_IV', 'Weight_SW']]
weighted_returns = weights_df.multiply(individual_returns, axis=0)
portfolio_performance = weighted_returns.sum()

# 5. Display the final results (CORRECTED aPPROACH)

print("\n--- Individual Stock Returns ---")
# Just print the Series directly. Pandas will format it nicely.
# To show as percentages, we can map a formatting function.
# print((individual_returns * 100).map('{:.2f}%'.format))
print((individual_returns).map('{:.6f}'.format))


print("\n--- Portfolio Performance ---")
# Rename for clarity and print the final Series
portfolio_performance.name = "Portfolio Return"
# print((portfolio_performance * 100).map('{:.4f}%'.format))
print((portfolio_performance).map('{:.4f}'.format))

Buy Date: 2025-06-12
Sell Date: 2025-06-13

--- Individual Stock Returns ---
Ticker
CWAN    -0.031113
NUE      0.029295
LULU    -0.032061
BF-B    -0.031147
GME      0.000000
AMH      0.002203
ROST    -0.012138
CAVA    -0.047029
HBAN    -0.028195
CPRT    -0.028006
dtype: object

--- Portfolio Performance ---
Weight_EW    -0.0178
Weight_IV    -0.0172
Weight_SW    -0.0184
Name: Portfolio Return, dtype: object


In [24]:
_file_list = [f.name for f in BACKTEST_RESULTS.glob('*')]
for i, _file in enumerate(_file_list):
    print(f"{i}. {_file}")

0. backtest_master_results.csv
1. backtest_master_results.parquet


In [25]:
df_backtest = pd.read_parquet(BACKTEST_RESULTS / _file_list[1])
print(f'df_backtest:\n{df_backtest}')

df_backtest:
   actual_selection_date_used  average_return  filter_max_debt_eq  filter_min_avg_volume_m  filter_min_price  filter_min_roe_pct inv_vol_col_name                          log_file  n_select_actual  n_select_requested  num_attempted_trades  num_failed_or_skipped_trades  num_selected_tickers  num_successful_trades  portfolio_return  portfolio_return_normalized       run_timestamp scheme  score_weight_change  score_weight_rel_volume  score_weight_rsi  score_weight_volatility selection_date  sharpe_ratio_period  std_dev_return  total_weight_traded  win_rate
0                  2025-06-11       -0.017819                 1.5                      2.0              10.0                 5.0      ATR/Price %  backtest_run_20250616_205128.log               10                  10                    10                             0                    10                     10         -0.017819                    -0.017819 2025-06-16 20:51:28     EW                 0.35                   

In [26]:
# The syntax is very similar to boolean indexing, but inside .loc
selected_df_loc = df_backtest.loc[df_backtest['actual_selection_date_used'] == DATE_STR]

print("Selected using .loc:")
print(selected_df_loc)

Selected using .loc:
  actual_selection_date_used  average_return  filter_max_debt_eq  filter_min_avg_volume_m  filter_min_price  filter_min_roe_pct inv_vol_col_name                          log_file  n_select_actual  n_select_requested  num_attempted_trades  num_failed_or_skipped_trades  num_selected_tickers  num_successful_trades  portfolio_return  portfolio_return_normalized       run_timestamp scheme  score_weight_change  score_weight_rel_volume  score_weight_rsi  score_weight_volatility selection_date  sharpe_ratio_period  std_dev_return  total_weight_traded  win_rate
0                 2025-06-11       -0.017819                 1.5                      2.0              10.0                 5.0      ATR/Price %  backtest_run_20250616_205128.log               10                  10                    10                             0                    10                     10         -0.017819                    -0.017819 2025-06-16 20:51:28     EW                 0.35             

In [None]:
####################################

In [28]:
# --- Parameters ---
NUM_TRADING_DAYS = 252
TRADING_DAYS_PER_YEAR = 252
RISK_FREE_RATE = 0.04 # Annual risk-free rate for Sharpe Ratio

In [29]:
# --- Simulate the Trading Strategy ---

# We will store the results of each part of the strategy here
trade_returns = []              # For Group 1: stores only the return of each active trade
daily_portfolio_returns = []    # For Group 2: stores return for ALL days (trade days and cash days)

# Loop through the trading days, taking steps of 2 for each cycle
# (Day i = Buy, Day i+1 = Sell)
for i in range(0, len(df_adj) - 1, 2):
    # --- Define trade dates ---
    buy_date = df_adj.index[i]
    sell_date = df_adj.index[i+1]
    
    # --- Simulate the "analyze and select" step ---
    # For each trade, we generate a new set of random weights.
    # In a real scenario, this would come from your df_selections for that day.
    # We'll use the 'EW' (Equal Weight) strategy for this simulation.
    weights = pd.Series(1/len(tickers), index=tickers) # Equal weights
    
    # --- Execute the trade ---
    buy_prices = df_adj.loc[buy_date]
    sell_prices = df_adj.loc[sell_date]
    
    # Calculate the return for this specific 1-day holding period
    individual_stock_returns = (sell_prices / buy_prices) - 1
    portfolio_trade_return = individual_stock_returns.dot(weights)
    
    # --- Log the results ---
    # For Group 1: Store the return of this active trade
    trade_returns.append(portfolio_trade_return)
    
    # For Group 2: Store the return for the invested day and the cash day
    daily_portfolio_returns.append(portfolio_trade_return) # Day 1: Invested
    daily_portfolio_returns.append(0.0)                     # Day 2: In Cash (0% return)

# Convert lists to Pandas Series for easier calculation
active_trades = pd.Series(trade_returns, name="Active Trade Returns")
total_portfolio_returns = pd.Series(daily_portfolio_returns, name="Total Portfolio Daily Returns")

# --- Calculate Group 1 Metrics: Evaluating the Core Signal ---
print("\n" + "="*50)
print("      Group 1: Core Signal Performance (Active Days Only)")
print("="*50)

avg_return_per_trade = active_trades.mean()
hit_rate = (active_trades > 0).sum() / len(active_trades) if len(active_trades) > 0 else 0

gross_profits = active_trades[active_trades > 0].sum()
gross_losses = abs(active_trades[active_trades < 0].sum())
profit_factor = gross_profits / gross_losses if gross_losses > 0 else np.inf

# Sharpe for active trades is annualized by sqrt of # of trades per year
num_trades_per_year = TRADING_DAYS_PER_YEAR / 2 
active_only_sharpe = (active_trades.mean() / active_trades.std()) * np.sqrt(num_trades_per_year) if active_trades.std() > 0 else 0

group1_stats = {
    'Average Return per Trade': f"{avg_return_per_trade:.4%}",
    'Hit Rate (Win %)': f"{hit_rate:.2%}",
    'Profit Factor': f"{profit_factor:.2f}",
    'Active-Only Sharpe Ratio': f"{active_only_sharpe:.2f}"
}
print(pd.Series(group1_stats))

# --- Calculate Group 2 Metrics: Evaluating the Total Portfolio ---
print("\n" + "="*50)
print("      Group 2: Total Portfolio Performance (Incl. Cash Days)")
print("="*50)

# Annualize the return and volatility from the total daily series
total_annual_return = total_portfolio_returns.mean() * TRADING_DAYS_PER_YEAR
total_annual_volatility = total_portfolio_returns.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

# Calculate Sharpe on the total portfolio's annualized metrics
total_sharpe_ratio = (total_annual_return - RISK_FREE_RATE) / total_annual_volatility if total_annual_volatility > 0 else 0

# Calculate Drawdown
equity_curve = (1 + total_portfolio_returns).cumprod()
running_max = equity_curve.cummax()
drawdown = (equity_curve - running_max) / running_max
max_drawdown = drawdown.min()

group2_stats = {
    'Total Annualized Return': f"{total_annual_return:.2%}",
    'Total Annualized Volatility': f"{total_annual_volatility:.2%}",
    'Total Portfolio Sharpe Ratio': f"{total_sharpe_ratio:.2f}",
    'Maximum Drawdown (MDD)': f"{max_drawdown:.2%}"
}
print(pd.Series(group2_stats))


      Group 1: Core Signal Performance (Active Days Only)
Average Return per Trade    -2.6722%
Hit Rate (Win %)               0.00%
Profit Factor                   0.00
Active-Only Sharpe Ratio        0.00
dtype: object

      Group 2: Total Portfolio Performance (Incl. Cash Days)
Total Annualized Return         -336.69%
Total Annualized Volatility       30.00%
Total Portfolio Sharpe Ratio      -11.36
Maximum Drawdown (MDD)             0.00%
dtype: object


In [None]:
import pandas as pd
import numpy as np

# # --- Recreate DataFrames (Setup remains the same) ---
# tickers = ['DOCU', 'BF-B', 'CPRT', 'SE', 'GE', 'PGR', 'CART', 'LULU', 'LTH', 'CME']
# data_selections = np.random.rand(10, 16)
# df_selections = pd.DataFrame(data_selections, index=tickers, columns=[
#     'RSI', 'Change %', 'ATR/Price %', 'Avg Volume, M', 'Rel Volume',
#     'Debt/Eq', 'ROE %', 'Price', 'z_RSI', 'z_Change%', 'z_RelVolume',
#     'z_ATR/Price%', 'final_score', 'Weight_EW', 'Weight_IV', 'Weight_SW'
# ])
# for col in ['Weight_EW', 'Weight_IV', 'Weight_SW']:
#     df_selections[col] = df_selections[col] / df_selections[col].sum()

# dates = pd.to_datetime(['2025-06-10', '2025-06-11', '2025-06-12', '2025-06-13'])
# price_data = 100 + np.random.randn(4, 10).cumsum(axis=0)
# df_adj = pd.DataFrame(price_data, index=dates, columns=tickers)

# print("--- Sample df_adj (prices) ---")
# print(df_adj.round(2)) # Rounding for cleaner display

# --- Main Calculation Logic (CORRECTED PRINTING) ---

# 1. Calculate Daily Stock Returns
daily_stock_returns = df_adj.pct_change().dropna()

print("\n--- Daily Stock Returns ---")
# Format each column to a percentage string for printing
formatted_daily_returns = daily_stock_returns.applymap('{:.2%}'.format)
print(formatted_daily_returns)


# --- Parameters for Annualization ---
trading_days = 252
risk_free_rate = 0.04 # Annual risk-free rate (e.g., 2%)

# --- Store results in a dictionary ---
portfolio_stats = {}

# 2. Iterate through each portfolio to calculate its metrics
portfolio_types = ['Weight_EW', 'Weight_IV', 'Weight_SW']
for p_type in portfolio_types:
    weights = df_selections[p_type]
    daily_portfolio_returns = daily_stock_returns.dot(weights)
    
    # Calculate metrics
    volatility = daily_portfolio_returns.std()
    annualized_volatility = volatility * np.sqrt(trading_days)
    avg_daily_return = daily_portfolio_returns.mean()
    annualized_return = avg_daily_return * trading_days
    sharpe_ratio = (annualized_return - risk_free_rate) / annualized_volatility
    
    # Store the raw numeric results
    portfolio_name = p_type.replace('Weight_', '')
    portfolio_stats[portfolio_name] = {
        'Annualized Return': annualized_return,
        'Annualized Volatility': annualized_volatility,
        'Sharpe Ratio': sharpe_ratio
    }

# 5. Convert results to a DataFrame
results_df = pd.DataFrame.from_dict(portfolio_stats, orient='index')

print("\n--- Portfolio Performance & Risk Metrics ---")

# Create a copy to format for printing, keeping the original with numbers
results_to_print = results_df.copy()
results_to_print['Annualized Return'] = results_to_print['Annualized Return'].map('{:.2%}'.format)
results_to_print['Annualized Volatility'] = results_to_print['Annualized Volatility'].map('{:.2%}'.format)
results_to_print['Sharpe Ratio'] = results_to_print['Sharpe Ratio'].map('{:.2f}'.format)

# Print the formatted DataFrame
print(results_to_print)