In [None]:
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
import joblib

In [None]:
# --- Configure paths ---
MODEL_DIR = Path("models")
MODEL_FILE = None
NEW_DATA_CSV = r"data/data.csv"

# --- Selected feature names ---
SELECTED_COLS = [
    'wavelet-LL_firstorder_RootMeanSquared',
    'wavelet-LL_firstorder_Mean',
    'original_glcm_JointAverage',
    'wavelet-LL_firstorder_Median',
    'original_firstorder_10Percentile',
    'original_firstorder_RootMeanSquared',
    'original_firstorder_Mean',
    'wavelet-LL_firstorder_10Percentile',
    'original_firstorder_Median',
]

In [None]:
def load_model(model_file=None, model_dir=MODEL_DIR):
    if model_file is not None:
        model_path = Path(model_file)
        if not model_path.exists():
            raise FileNotFoundError(f"Model file not found: {model_path}")
    else:
        model_dir = Path(model_dir)
        cand = sorted(model_dir.glob("*.joblib"))
        if not cand:
            raise FileNotFoundError(f"No .joblib models found under {model_dir}")
        model_path = cand[-1]
    print(model_path)
    model = joblib.load(model_path)
    return model_path, model

def read_and_prepare(csv_path):
    df = pd.read_csv(csv_path)
    if df.shape[1] < 2:
        raise ValueError("CSV must have at least 2 columns (ID + features).")

    if df.columns[0] != "ID":
        df = df.rename(columns={df.columns[0]: "ID"})

    feature_block = df.iloc[:, 1:755]
    missing = [c for c in SELECTED_COLS if c not in feature_block.columns]
    if missing:
        raise ValueError("Missing required features: " + ", ".join(missing))

    X = feature_block[SELECTED_COLS].copy()
    for c in X.columns:
        X[c] = pd.to_numeric(X[c], errors="coerce")

    ids = df["ID"].copy()
    return ids, X

def predict_dr_score(model, X):
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X)
        if proba.ndim == 2 and proba.shape[1] >= 2:
            return proba[:, 1]
    if hasattr(model, "decision_function"):
        s = model.decision_function(X)
        return np.asarray(s).ravel()
    yhat = model.predict(X)
    return np.asarray(yhat).ravel()

def run(new_data_csv=NEW_DATA_CSV, model_file=MODEL_FILE, model_dir=MODEL_DIR, out_csv=None):
    model_path, model = load_model(model_file, model_dir)
    ids, X = read_and_prepare(new_data_csv)
    scores = predict_dr_score(model, X)
    out = pd.DataFrame({"ID": ids, "DR_Score": scores})

    if out_csv is None:
        p = Path(new_data_csv)
        stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        out_csv = p.with_name(p.stem + f"_drscore_{stamp}.csv")

    out.to_csv(out_csv, index=False)
    print(f"✅ Loaded model: {model_path}")
    print(f"✅ Input rows: {len(out)}")
    print(f"✅ Wrote predictions: {out_csv}")
    return out

In [None]:
# Example run
results = run(new_data_csv=NEW_DATA_CSV, model_file=MODEL_FILE, model_dir=MODEL_DIR)
results.head()