In [None]:
# Cell 1: Imports and Setup for backtest.ipynb

import sys
from pathlib import Path
# import os # No longer strictly needed here

# Add the 'notebooks' directory to sys.path
notebooks_dir = Path.cwd().parent 
if str(notebooks_dir) not in sys.path:
    sys.path.insert(0, str(notebooks_dir))

import pandas as pd
# import numpy as np # Not directly used in this cell
import logging
import datetime as dt # For Cell 4
import matplotlib.pyplot as plt # For Cell 4

from backtest.backtest_engine import run_backtest 
import backtest.backtest_utils as utils
import backtest.backtest_config as config # To access config variables

# --- Logging Setup for the Notebook ---
notebook_logger = logging.getLogger("backtest_notebook_per_minute") # MODIFIED
if not notebook_logger.handlers:
    notebook_logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    notebook_logger.addHandler(ch)
else:
    notebook_logger.setLevel(logging.INFO)

# notebook_logger.info("Backtesting Notebook Setup Complete (for Per-Minute Model).")
# notebook_logger.info(f"Backtest on markets resolving: {config.BACKTEST_START_DATE_STR} to {config.BACKTEST_END_DATE_STR}")
# # notebook_logger.info(f"Decision Time: EACH MINUTE (driven by feature data rows)") # Old DECISION_TIME... not used
# notebook_logger.info(f"Model to be used from: {config.MODEL_DIR}") # This now points to .../logreg_per_minute

In [None]:
# Cell 2: Load Model and Data

try:
    notebook_logger.info("Loading PER-MINUTE model, scaler, and feature names...")
    model, scaler, model_feature_names = utils.load_model_and_dependencies() # Uses updated paths from config
    notebook_logger.info("Model components loaded successfully.")

    notebook_logger.info("Loading PER-MINUTE features data...")
    all_features_df = utils.load_features_for_backtest() # Uses updated pattern from config
    notebook_logger.info(f"Per-minute features data loaded successfully with {len(all_features_df)} decision points.")
    
    if all_features_df.empty:
        notebook_logger.critical("Features DataFrame is empty. Aborting backtest.")
        raise SystemExit("Features DataFrame empty.")

except FileNotFoundError:
    notebook_logger.critical("Essential file not found (model, scaler, features, or data). Aborting backtest.")
    # raise
except Exception as e:
    notebook_logger.critical(f"An unexpected error occurred during loading: {e}", exc_info=True)
    # raise

In [None]:
# Cell 3: Run the Backtest

if 'model' in locals() and 'all_features_df' in locals() and not all_features_df.empty:
    notebook_logger.info("Proceeding to run the backtest engine with per-minute decision model...")
    try:
        total_pnl_cents, total_trades = run_backtest(all_features_df, model, scaler, model_feature_names)
        
        notebook_logger.info("--- Main Backtest Execution Finished (Per-Minute Model) ---")
        notebook_logger.info(f"Overall P&L from backtest engine: {total_pnl_cents / 100.0 :.2f} USD")
        notebook_logger.info(f"Total trades considered/made by engine: {total_trades}") # "Made" is more accurate if action != HOLD
        notebook_logger.info(f"Daily trade logs are in: {config.LOG_DIR}")

    except Exception as e:
        notebook_logger.error(f"An error occurred during run_backtest: {e}", exc_info=True)
else:
    notebook_logger.error("Model or features data not loaded. Cannot run backtest.")

In [None]:
# Cell 4: (Optional) Basic Analysis of Trade Logs

# This cell can be expanded to load the daily CSV logs and perform more detailed analysis,
# create equity curves, calculate Sharpe ratio, etc.
# Ensure matplotlib is imported if not already done in Cell 1
import matplotlib.pyplot as plt
import pandas as pd # ensure imported if not already
from pathlib import Path # ensure imported for config.LOG_DIR

if config.LOG_DIR.exists():
    log_files = list(config.LOG_DIR.glob("*_trades.csv"))
    if log_files:
        notebook_logger.info(f"Found {len(log_files)} trade log files:")
        for lf in sorted(log_files): notebook_logger.info(f" - {lf.name}")
        
        all_daily_logs_df = pd.DataFrame()
        for lf in sorted(log_files):
            try:
                daily_df = pd.read_csv(lf)
                if not daily_df.empty:
                    daily_df['trade_date_file'] = lf.stem.split('_trades')[0] # Date from filename
                    all_daily_logs_df = pd.concat([all_daily_logs_df, daily_df], ignore_index=True)
            except pd.errors.EmptyDataError: notebook_logger.warning(f"Log file {lf.name} is empty.")
            except Exception as e: notebook_logger.error(f"Error reading log file {lf.name}: {e}")
        
        if not all_daily_logs_df.empty:
            notebook_logger.info(f"\nLoaded a total of {len(all_daily_logs_df)} trades from daily logs.")
            if not all_daily_logs_df.empty: display(all_daily_logs_df.head()) # Check before display
            
            pnl_from_logs = all_daily_logs_df['pnl_cents'].sum()
            notebook_logger.info(f"Total P&L calculated from concatenated log files: {pnl_from_logs / 100.0:.2f} USD")

            if 'trade_execution_time_utc' in all_daily_logs_df.columns and not all_daily_logs_df['trade_execution_time_utc'].isnull().all():
                try:
                    all_daily_logs_df['trade_datetime_utc'] = pd.to_datetime(all_daily_logs_df['trade_execution_time_utc'])
                    all_daily_logs_df.sort_values(by='trade_datetime_utc', inplace=True) # Sort by actual trade time
                    all_daily_logs_df['cumulative_pnl_cents'] = all_daily_logs_df['pnl_cents'].cumsum()
                    
                    plt.figure(figsize=(12,6))
                    all_daily_logs_df.set_index('trade_datetime_utc')['cumulative_pnl_cents'].plot()
                    plt.title('Cumulative P&L Over Backtest Period (Per-Minute Decisions)')
                    plt.xlabel('Trade Execution Time (UTC)')
                    plt.ylabel('Cumulative P&L (cents)')
                    plt.grid(True)
                    plt.show()
                except Exception as e:
                    notebook_logger.error(f"Error plotting P&L curve: {e}")
            else:
                notebook_logger.warning("Column 'trade_execution_time_utc' not found or all null in logs, cannot plot P&L curve by time.")
        else:
            notebook_logger.info("No trade data loaded from log files for analysis.")
    else:
        notebook_logger.info("No trade log files found in the log directory.")
else:
    notebook_logger.warning(f"Log directory {config.LOG_DIR} does not exist.")