In [1]:
import pandas as pd
from datetime import datetime 

# --- Configuration ---
FEATURE_FILE = "s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/AAPL_features.csv" 

# --- Load and Prepare ---
try:
    print(f"Loading data from: {FEATURE_FILE}")
    df = pd.read_csv(FEATURE_FILE)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by='date')

    # --- Replicate the NaN drop logic from feature_builder.py ---
    # Define features that might have NaNs at the beginning due to rolling/lag
    features_to_check_for_nans = [
        'ret_1d', 'mom_5d', 'rsi_14', 'abn_volume',
        'ret_lag_1d', 'volatility_20d', 'ma_50d', 'ma_200d',
        'return_x_volume'
     ]

    print(f"Shape before dropping NaNs: {df.shape}")
    df_cleaned = df.dropna(subset=features_to_check_for_nans)
    print(f"Shape after dropping NaNs: {df_cleaned.shape}")

    # Perform the 80/20 split on the cleaned data
    split_index = int(len(df_cleaned) * 0.8)
    test_df = df_cleaned[split_index:]

    # --- Print the Test Period ---
    if not test_df.empty:
        start_test_date = test_df['date'].min().date()
        end_test_date = test_df['date'].max().date()

        print(f"\n Your test period starts on: {start_test_date}")
        print(f" Your test period ends on:   {end_test_date}")
        print("\nChoose any trading date within this range for your TEST_DATE_STR.")
    else:
        print(" ERROR: Test dataframe is empty after splitting. Check NaN handling or data.")

except FileNotFoundError:
    print(f" ERROR: Feature file not found at '{FEATURE_FILE}'. Please update the path.")
except KeyError as e:
    print(f" ERROR: A required column is missing from the CSV file: {e}")
except Exception as e:
     print(f" ERROR: Could not process file: {e}")

  from pandas.core.computation.check import NUMEXPR_INSTALLED


Loading data from: s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/AAPL_features.csv


severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



Shape before dropping NaNs: (752, 25)
Shape after dropping NaNs: (752, 25)

✅ Your test period starts on: 2025-03-12
✅ Your test period ends on:   2025-10-15

Choose any trading date within this range for your TEST_DATE_STR.


In [5]:
import pandas as pd
import joblib
import os
from datetime import datetime
from sagemaker.xgboost.estimator import XGBoost

# --- Configuration ---
MODEL_PATH = "model.pkl"
FEATURE_FILE = "s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/AAPL_features.csv"
TICKER_SYMBOL = 'AAPL'
TEST_DATE_STR = "2025-09-15" 

def test_model_on_date(ticker_symbol, feature_file_path, model_path, test_date_str):
    """Loads model, gets features for a specific date, predicts, and compares to actual."""

    # --- 1. Load Model ---
    print(f"Loading model from: {model_path}")
    try:
        model = joblib.load(model_path)
    except Exception as e:
        print(f" ERROR: Could not load model: {e}")
        return

    # --- 2. Load Full Feature Data ---
    print(f"Loading features from: {feature_file_path}")
    try:
        df = pd.read_csv(feature_file_path)
        df['date'] = pd.to_datetime(df['date']) 
    except Exception as e:
        print(f" ERROR: Could not read feature file: {e}")
        return

    # --- 3. Create Target  ---
    df['target'] = (df.groupby('symbol')['close'].shift(-1) > df['close']).astype(int)

    # --- 4. Select the Specific Test Day's Data ---
    try:
        test_date = datetime.strptime(test_date_str, "%Y-%m-%d").date()
        # Get the row corresponding to the chosen date
        day_data = df[df['date'].dt.date == test_date].copy()
    except ValueError:
        print(f" ERROR: Invalid date format '{test_date_str}'. Use YYYY-MM-DD.")
        return

    if day_data.empty:
        print(f" ERROR: No data found for {ticker_symbol} on {test_date_str}. Was it a trading day?")
        return

    # --- 5. Get Features and Known Outcome ---
    known_non_features = ['date', 'open', 'high', 'low', 'close', 'volume', 'symbol', 'asof', 'target']
    features = [col for col in df.columns if col not in known_non_features]

    # Check for NaNs in features for this specific day (might happen with lags/rolling)
    if day_data[features].isnull().values.any():
        print(f" WARNING: The data for {test_date_str} contains NaN values in features. Prediction might be unreliable or fail.")
        print(day_data[features].isnull().sum())
        # Decide if you want to proceed or stop
        # return

    X_test_day = day_data[features]
    # Get the pre-calculated target value for this day (outcome of the NEXT day)
    known_outcome = day_data['target'].iloc[0] # .iloc[0] gets the value from the single row

    # --- 6. Make Prediction ---
    try:
        prediction = model.predict(X_test_day)[0]
        prediction_proba = model.predict_proba(X_test_day)[0]
    except Exception as e:
        print(f" ERROR: Prediction failed for {test_date_str}: {e}")
        return

    # --- 7. Compare and Display Results ---
    predicted_direction = "Up" if prediction == 1 else "Down"
    actual_direction = "Up" if known_outcome == 1 else "Down"
    confidence = prediction_proba[1] if predicted_direction == "Up" else prediction_proba[0]

    print(f"\n--- Historical Test Complete for {ticker_symbol} on {test_date_str} ---")
    print(f"  Features used from: {test_date_str}")
    print(f"  Model Predicted:    {predicted_direction} (Confidence: {confidence:.2%})")
    print(f"  Actual Outcome was: {actual_direction} (for day {test_date_str}'s close vs next day's close)")

    if predicted_direction == actual_direction:
        print(" Prediction was CORRECT!")
    else:
        print(" Prediction was INCORRECT.")

# --- Run the test ---
if __name__ == '__main__':
    if not os.path.exists(MODEL_PATH):
         print(f"Please ensure 'model.pkl' is in the correct path: '{MODEL_PATH}'.")
    else:
        test_model_on_date(TICKER_SYMBOL, FEATURE_FILE, MODEL_PATH, TEST_DATE_STR)

Loading model from: model.pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading features from: s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/AAPL_features.csv

--- ✅ Historical Test Complete for AAPL on 2025-09-15 ---
  Features used from: 2025-09-15
  Model Predicted:    Up (Confidence: 52.55%)
  Actual Outcome was: Up (for day 2025-09-15's close vs next day's close)
  ✅ Prediction was CORRECT!


In [7]:
import pandas as pd
import joblib
import os
import sagemaker # Need sagemaker session to easily list S3 files
import boto3
from sklearn.metrics import accuracy_score, classification_report
from urllib.parse import urlparse

# --- Configuration ---
MODEL_PATH = "model.pkl" 
S3_FEATURE_PREFIX = "s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/" 

def calculate_overall_accuracy(model_path, s3_feature_prefix):
    """Loads the model and calculates accuracy over the entire test set."""

    # --- 1. Load Local Model ---
    print(f"Loading model from: {model_path}")
    try:
        model = joblib.load(model_path)
    except Exception as e:
        print(f" ERROR: Could not load model: {e}")
        return

    # --- 2. Load ALL Feature Data from S3 ---
    print(f"Loading all feature files from: {s3_feature_prefix}")
    all_features_df = pd.DataFrame()
    try:
        # Use sagemaker session or boto3 to list files in the S3 prefix
        s3_uri_parts = urlparse(s3_feature_prefix)
        bucket = s3_uri_parts.netloc
        prefix = s3_uri_parts.path.lstrip('/')

        s3_client = boto3.client('s3')
        paginator = s3_client.get_paginator('list_objects_v2')
        pages = paginator.paginate(Bucket=bucket, Prefix=prefix)

        feature_files_s3 = []
        for page in pages:
            if "Contents" in page:
                for obj in page['Contents']:
                    key = obj['Key']
                    if key.endswith('_features.csv'):
                         feature_files_s3.append(f"s3://{bucket}/{key}")

        if not feature_files_s3:
            print(f" ERROR: No feature files found at {s3_feature_prefix}")
            return

        # Read and concatenate all files
        all_features_df = pd.concat((pd.read_csv(f) for f in feature_files_s3), ignore_index=True)
        all_features_df['date'] = pd.to_datetime(all_features_df['date'])
        all_features_df = all_features_df.sort_values(by=['date', 'symbol']).reset_index(drop=True)
        print(f"Loaded combined data shape: {all_features_df.shape}")

    except Exception as e:
        print(f" ERROR: Could not read feature files from S3: {e}")
        return

    # --- 3. Training Preprocessing ---
    # Create target
    all_features_df['target'] = (all_features_df.groupby('symbol')['close'].shift(-1) > all_features_df['close']).astype(int)

    # Define features 
    known_non_features = ['date', 'open', 'high', 'low', 'close', 'volume', 'symbol', 'asof', 'target']
    features = [col for col in all_features_df.columns if col not in known_non_features]

    # Define features used for dropping NaNs
    features_to_check_for_nans = [
        'ret_1d', 'mom_5d', 'rsi_14', 'abn_volume',
        'ret_lag_1d', 'volatility_20d', 'ma_50d', 'ma_200d',
        'return_x_volume'
     ]
    # Also include the target for dropping the last row of each symbol
    final_df = all_features_df.dropna(subset=features_to_check_for_nans + ['target'])
    print(f"Shape after NaN drop: {final_df.shape}")


    # --- 4. Perform 80/20 Chronological Split ---
    X = final_df[features]
    y = final_df['target']
    split_index = int(len(final_df) * 0.8)
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]

    if X_test.empty:
        print(" ERROR: Test set is empty after splitting. Check data or split logic.")
        return

    print(f"Test set size: {len(X_test)} rows")

    # --- 5. Make Predictions on Test Set ---
    print("Making predictions on the test set...")
    try:
        predictions = model.predict(X_test)
    except Exception as e:
        print(f" ERROR: Prediction failed on test set: {e}")
        return

    # --- 6. Calculate and Display Metrics ---
    accuracy = accuracy_score(y_test, predictions)

    print("\n--- Overall Test Set Evaluation ---")
    print(f"  Overall Accuracy: {accuracy:.4f} ({accuracy:.2%})")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions, target_names=['Down (0)', 'Up (1)']))

# --- Run the calculation ---
if __name__ == '__main__':
    if not os.path.exists(MODEL_PATH):
        print(f"Please ensure 'model.pkl' is at '{MODEL_PATH}'.")
    else:
        calculate_overall_accuracy(MODEL_PATH, S3_FEATURE_PREFIX)

Loading model from: model.pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading all feature files from: s3://ai-trading-copilot-curated/features/fe-yearly-run-2025-10-17-23-39-01/
Loaded combined data shape: (2256, 25)
Shape after NaN drop: (2256, 26)
Test set size: 452 rows
Making predictions on the test set...

--- ✅ Overall Test Set Evaluation ---
  Overall Accuracy: 0.6305 (63.05%)

Classification Report:
              precision    recall  f1-score   support

    Down (0)       0.67      0.38      0.48       207
      Up (1)       0.62      0.84      0.71       245

    accuracy                           0.63       452
   macro avg       0.64      0