In [None]:
import joblib
import os
import pandas as pd

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## Import model, data, and prepare for inference

In [None]:
### 1) load model 
path = os.path.expanduser("~/projects/QUSA/models/logic_model.pkl")

bundle = joblib.load(path)
model = bundle["model"]
safe_features = bundle["features"]

### 2) load data
data = pd.read_csv('~/projects/QUSA/data/processed/AMZN_processed.csv')

# define target variable
data['overnight_delta_positive'] = (data['overnight_delta'] > 0).astype(int)
# drop unknown overnight deltas
data = data.dropna(subset=['overnight_delta'])

### 3) rebuild features 
confounds = [
    'overnight_delta',
    'overnight_delta_pct',
    'date',
    'z_score',
    'intraday_return_strong_positive',
    'intraday_return_strong_negative',
]
data = data.drop(columns=confounds, errors='ignore')


### 3c) select safe (trained) features
X = data[safe_features].copy()
y = data['overnight_delta_positive'].fillna(0).astype(int)

# handle missing values
X = X.fillna(0)

### 4b) make predictions
pred_direction = model.predict(X) 

## Present test metrics 

In [None]:
print("Accuracy:", accuracy_score(y, pred_direction))
print(confusion_matrix(y, pred_direction))
print(classification_report(y, pred_direction))

importances = pd.Series(model.feature_importances_, index=X.columns)\
    .sort_values(ascending=False)

print("\nFeature Importance:")
print(importances.loc[importances.values>0.0])
print(f"Total features used: {len(importances.loc[importances.values>0.0])}")