In [1]:
# ==============================================
# NEXT STEP: native .keras save + ROC/PR plots + user-input prediction UI
# ==============================================
import os, json, pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, classification_report
import tensorflow as tf

OUTPUT_DIR = r"C:\Users\sagni\Downloads\Dynamic Curriculum Designer"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---------- 1) Save in native Keras format ----------
# Use the in-memory `model` if present; else reload from disk
try:
    _ = model  # noqa: F821
    mdl = model
except NameError:
    # Try to load a previously-saved model
    try:
        mdl = tf.keras.models.load_model(os.path.join(OUTPUT_DIR, "model.keras"))
    except Exception:
        mdl = tf.keras.models.load_model(os.path.join(OUTPUT_DIR, "model.h5"))

keras_path = os.path.join(OUTPUT_DIR, "model.keras")
mdl.save(keras_path)
print(f"[INFO] Saved native Keras model -> {keras_path}")

# ---------- 2) ROC + PR curves (need y_test and y_pred_prob from the training cell) ----------
# If you ran the previous training cell in the same session, y_test and y_pred_prob already exist.
# If not, re-run the training cell so these are available.
try:
    _ = y_test           # noqa
    _ = y_pred_prob      # noqa
    # ROC
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6,4))
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
    plt.plot([0,1], [0,1], linestyle="--")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend(loc="lower right")
    roc_path = os.path.join(OUTPUT_DIR, "roc_curve.png")
    plt.tight_layout()
    plt.savefig(roc_path, dpi=150)
    plt.show()
    print(f"[INFO] Saved ROC curve -> {roc_path}")

    # Precision–Recall
    precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
    ap = average_precision_score(y_test, y_pred_prob)

    plt.figure(figsize=(6,4))
    plt.plot(recall, precision, label=f"AP = {ap:.3f}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision–Recall Curve")
    plt.legend(loc="lower left")
    pr_path = os.path.join(OUTPUT_DIR, "pr_curve.png")
    plt.tight_layout()
    plt.savefig(pr_path, dpi=150)
    plt.show()
    print(f"[INFO] Saved PR curve -> {pr_path}")

    # Save classification report text as well
    try:
        from sklearn.metrics import accuracy_score
        acc_val = float(accuracy_score(y_test, (y_pred_prob >= 0.5).astype(int)))
    except Exception:
        acc_val = None
    report_txt = classification_report(y_test, (y_pred_prob >= 0.5).astype(int))
    with open(os.path.join(OUTPUT_DIR, "classification_report.txt"), "w", encoding="utf-8") as f:
        f.write(report_txt)
    metrics_path = os.path.join(OUTPUT_DIR, "metrics.json")
    with open(metrics_path, "w", encoding="utf-8") as f:
        json.dump({"accuracy": acc_val, "roc_auc": float(roc_auc), "average_precision": float(ap)}, f, indent=2)
    print(f"[INFO] Saved classification_report.txt and metrics.json in {OUTPUT_DIR}")

except NameError:
    print("[WARN] y_test / y_pred_prob not found in this session. Re-run the training cell, then re-run this cell to create ROC/PR plots and metrics files.")

# ---------- 3) Pure-Python helper for user-input prediction ----------
# Uses saved preprocessor + model from disk; works even in a fresh session.
def predict_from_dict(feature_dict,
                      preproc_path=os.path.join(OUTPUT_DIR, "preprocessor.pkl"),
                      label_path=os.path.join(OUTPUT_DIR, "label_encoder.pkl"),
                      model_path_keras=os.path.join(OUTPUT_DIR, "model.keras"),
                      model_path_h5=os.path.join(OUTPUT_DIR, "model.h5")):
    import pickle
    import numpy as np
    import pandas as pd
    import tensorflow as tf

    # Load preprocessor (to get original column order)
    with open(preproc_path, "rb") as f:
        preproc = pickle.load(f)

    # Recover raw input column lists from the ColumnTransformer
    cat_cols = preproc.transformers_[0][2]
    num_cols = preproc.transformers_[1][2]
    expected_cols = list(cat_cols) + list(num_cols)

    # Build single-row DataFrame
    row = {col: feature_dict.get(col, np.nan) for col in expected_cols}
    X_new = pd.DataFrame([row], columns=expected_cols)

    # Transform
    X_new_proc = preproc.transform(X_new)

    # Load model (prefer .keras, fallback to .h5)
    try:
        mdl = tf.keras.models.load_model(model_path_keras)
    except Exception:
        mdl = tf.keras.models.load_model(model_path_h5)

    prob = float(mdl.predict(X_new_proc).ravel()[0])
    pred = int(prob >= 0.5)

    # Decode label
    with open(label_path, "rb") as f:
        lab = pickle.load(f)
    label = lab.inverse_transform([pred])[0]
    return {"probability_pass": prob, "predicted_class": pred, "label_decoded": str(label)}

# Example usage (edit values to your columns, then run):
# example = {
#     "school":"GP","sex":"F","address":"U","famsize":"GT3","Pstatus":"T",
#     "schoolsup":"no","famsup":"yes","paid":"no","activities":"yes","nursery":"yes","higher":"yes","internet":"yes","romantic":"no",
#     "age":17,"Medu":3,"Fedu":2,"traveltime":1,"studytime":2,"failures":0,"famrel":4,"freetime":3,"goout":3,"Dalc":1,"Walc":1,"health":5,"absences":4
# }
# print(predict_from_dict(example))

# ---------- 4) OPTIONAL: Gradio UI for user input ----------
# Creates dropdowns for categorical columns (using learned categories) and number inputs for numeric columns.
try:
    import gradio as gr
    import pickle

    with open(os.path.join(OUTPUT_DIR, "preprocessor.pkl"), "rb") as f:
        preproc = pickle.load(f)
    cat_cols = list(preproc.transformers_[0][2])
    num_cols = list(preproc.transformers_[1][2])
    ohe = preproc.transformers_[0][1]
    cat_choices = {col: list(ohe.categories_[i]) for i, col in enumerate(cat_cols)}

    def _predict_gradio(*vals):
        # vals: cat values in order of cat_cols, then numeric values in order of num_cols
        cat_vals = vals[:len(cat_cols)]
        num_vals = vals[len(cat_cols):]
        payload = {c: v for c, v in zip(cat_cols, cat_vals)}
        payload.update({c: v for c, v in zip(num_cols, num_vals)})
        out = predict_from_dict(payload)
        return f"Predicted Label: {out['label_decoded']}  |  Probability(pass): {out['probability_pass']:.3f}"

    inputs = []
    for c in cat_cols:
        # Use dropdown with known categories; allow None
        inputs.append(gr.Dropdown(choices=cat_choices[c], label=c, value=cat_choices[c][0] if len(cat_choices[c]) else None))
    for c in num_cols:
        inputs.append(gr.Number(label=c))

    demo = gr.Interface(
        fn=_predict_gradio,
        inputs=inputs,
        outputs=gr.Textbox(label="Prediction"),
        title="Dynamic Curriculum Designer — User Prediction",
        description="Select categorical features and enter numeric features to predict Pass/Fail."
    )
    print("[INFO] Launching Gradio UI... (if running locally, it opens in a new tab)")
    # Uncomment the next line to launch the UI:
    # demo.launch()
except Exception as e:
    print("[INFO] Gradio UI not available (optional). Install with `pip install gradio`. Error:", e)




[INFO] Saved native Keras model -> C:\Users\sagni\Downloads\Dynamic Curriculum Designer\model.keras
[WARN] y_test / y_pred_prob not found in this session. Re-run the training cell, then re-run this cell to create ROC/PR plots and metrics files.
[INFO] Gradio UI not available (optional). Install with `pip install gradio`. Error: No module named 'gradio'
