In [10]:
# qusa/notebooks/predict_model.ipynb

import joblib 
import os 
import pandas as pd
import sys

from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))

print(sys.path[-1])  # sanity check: should be /Users/sidrana/Projects/qusa


from scripts.most_recent_day import MostRecentDayFetcher
from qusa.features.pipeline import FeaturePipeline


/Users/sidrana/Projects/qusa


## Load historic data

In [11]:
csv_path = "/Users/sidrana/Projects/qusa/data/raw/AMZN_2023-12-01_2025-12-01.csv"
df_hist = pd.read_csv(csv_path, parse_dates=["date"])
df_hist = df_hist.sort_values("date")


## Load model 

In [12]:
# load model bundle
MODEL_PATH = os.path.expanduser("~/projects/QUSA/models/logic_model.pkl")

bundle = joblib.load(MODEL_PATH)
model = bundle["model"]             # DecisionTreeClassifier
safe_features = bundle["features"]  # list of columns

## Fetch most recent trading day and merge with historic data

In [None]:
os.environ["POLYGON_API_KEY"] = "xxx"

fetcher = MostRecentDayFetcher() 

df_latest = fetcher.fetch_daily_bar("AMZN")
df_latest['date'] = pd.to_datetime(df_latest['date'])

# Check if latest date already exists
if df_latest['date'].iloc[0] not in df_hist['date'].values:
    df_hist = pd.concat([df_hist, df_latest], ignore_index=True)
else:
    print("Latest day already exists in historical data")

df_hist = df_hist.sort_values("date").reset_index(drop=True)


## Engineer features for merged data

In [17]:
# define feature pipeline configuration
config = {
    "rsi_period": 14,
    "atr_period": 14,
    "volume_ratio_window": 5,
    "use_52_week_high_low": True,
    "ma_windows": [5, 10, 20],
    "calendar_features": True,
}
# Engineer features for merged data
df_features = FeaturePipeline(config=config).run(df_hist)

# filter only safe features for prediction
X = df_features[safe_features]

# get latest row for prediction
X_latest = X.tail(1)

## Predict on most recent day 

In [18]:
# Predict
y_pred = model.predict(X_latest)
y_prob = model.predict_proba(X_latest)[:, 1]  # probability for "up" class
leaf_index = model.apply(X_latest)           # leaf node activated

print("Prediction (class):", y_pred[0])
print("Prediction probability (up):", y_prob[0])
print("Leaf node activated:", leaf_index[0])

Prediction (class): 1
Prediction probability (up): 0.7694349047767715
Leaf node activated: 7
