# 📌 Next‑Steps Patch: Evaluation & Live Prediction Cells

Copy **each code cell** into your main access‑ticket notebook *after Step 6*.

1. **6 a. Evaluate the WHO model** – PR AUC, ROC AUC, Top‑10 Recall.  
2. **6 b. Evaluate the WHEN model** – MAE, C‑index.  
3. **7.  Generate live predictions** – top N users and expected hours to ticket.

> **Requirement:** the variables created in the earlier notebook (e.g., `df`, `feature_cols`, `model_who`, `cph`, `test`, `X_test`, `y_test`, `test_u`) must already be in memory.


In [None]:
### ==== 6 a  WHO model – full test‑set metrics ====
from sklearn.metrics import average_precision_score, roc_auc_score, precision_recall_curve

# Probabilities for test set
probs_test = model_who.predict_proba(X_test)[:, 1]

# Classic metrics
pr_auc  = average_precision_score(y_test, probs_test)
roc_auc = roc_auc_score(y_test, probs_test)
print(f"WHO  – PR AUC = {pr_auc:.3f}   ROC AUC = {roc_auc:.3f}")

# ---- Top‑k recall ----
k = 10
next_tickets = test[test['y_who'] == 1]

# Latest record per user in test
latest_rows = (
    test.sort_values('open_datetime')
        .groupby('requester_id')
        .tail(1)
        .assign(prob=probs_test)
)

topk_users = (latest_rows.sort_values('prob', ascending=False)
                           .head(k)['requester_id']
                           .tolist())

recall_at_k = next_tickets['requester_id'].isin(topk_users).mean()
print(f"WHO  – Top‑{k} recall (7‑day horizon): {recall_at_k:.2%}")

In [None]:
### ==== 6 b  WHEN model – test MAE & C‑index ====
from lifelines.utils import concordance_index

# Median predicted days to event
median_pred_days = cph.predict_median(test_u).rename('pred_days')
actual_days      = test_u['duration']

mae = (median_pred_days - actual_days).abs().mean()
print(f"WHEN – MAE on users with next ticket: {mae:.1f} days")

cindex = concordance_index(actual_days, -cph.predict_partial_hazard(test_u), test_u['event'])
print(f"WHEN – C‑index: {cindex:.3f}")

In [None]:
### ==== 7  Predict who & when *today* ====
H_PRED = 7   # horizon in days
TOP_N  = 10

today_dt = df['open_datetime'].max()

# Latest row per user
latest_rows_now = (
    df[df['open_datetime'] <= today_dt]
      .sort_values('open_datetime')
      .groupby('requester_id')
      .tail(1)
)

latest_rows_now = latest_rows_now.assign(
    prob_who = model_who.predict_proba(latest_rows_now[feature_cols])[:, 1]
)

candidates = (latest_rows_now.sort_values('prob_who', ascending=False)
              .head(TOP_N)
              .copy())

# Aggregate features per candidate for WHEN model
user_feats_now = candidates.groupby('requester_id')[feature_cols].mean().reset_index()
user_feats_now['pred_hours_to_ticket'] = cph.predict_expected(user_feats_now) * 24  # days→hours

# Merge and display
final_pred = (candidates[['requester_id', 'prob_who']]
              .merge(user_feats_now[['requester_id', 'pred_hours_to_ticket']],
                     on='requester_id')
              .sort_values('prob_who', ascending=False)
              .reset_index(drop=True))
final_pred