# Imports and Libraries

In [None]:
#uncomment the following to install required packages
#!pip install pandas numpy matplotlib plotly scikit-learn

Collecting scikit-learn
  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl.metadata
  Downloading scikit_learn-1.7.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Obtaining dependency information for scipy>=1.8.0 from https://files.pythonhosted.org/packages/d6/73/c449a7d56ba6e6f874183759f8483cde21f900a8be117d67ffbb670c2958/scipy-1.16.2-cp311-cp311-win_amd64.whl.metadata
  Downloading scipy-1.16.2-cp311-cp311-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     --------------------------------- ------ 51.2/60.8 kB 2.7 MB/s eta 0:00:01
     ---------------------------------------- 60.8/60.8 kB 3.4 MB/s eta 0:00:00
Collecting joblib>=1.2.0 (from scikit-learn)
  Obtaining dependency information for joblib>=1.2.0 from https://files.pythonh


[notice] A new release of pip is available: 23.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

DATA_DIR = Path("./Data")
FLIGHTS_PATH = DATA_DIR / "Flight_on_time_HIX_CA.csv"
WEATHER_PATH = DATA_DIR / "weather_cleaned_HIX.csv"


Load flights and weather CSVs, normalize column names, parse dep_dt and wx_dt with pd.to_datetime(errors="coerce"), drop rows with invalid datetimes, sort/reset index. Creates f, w and counts bad_f/bad_w. Note: datetimes are naive (utc=False) and later cells perform an asof merge using int64 views, so dep_dt/wx_dt must be valid and sorted.


In [None]:
# flights dataset import and clean
f = pd.read_csv(FLIGHTS_PATH)
f.columns = [c.lower().strip().replace(" ", "_") for c in f.columns]
if not {"flightdate","scheduled_departure_time"}.issubset(f.columns):
    raise ValueError("Expected 'FlightDate' and 'scheduled_departure_time' in flights.")

f["dep_dt"] = pd.to_datetime(
    f["flightdate"].astype(str).str.strip() + " " + f["scheduled_departure_time"].astype(str).str.strip(),
    errors="coerce",
    utc=False
)
bad_f = f["dep_dt"].isna().sum()
if bad_f:
    print(f"Dropping {bad_f} flights with invalid dep_dt")
f = f.dropna(subset=["dep_dt"]).sort_values("dep_dt").reset_index(drop=True)

# weather dataset import and clean  
w = pd.read_csv(WEATHER_PATH)
w.columns = [c.lower().strip().replace(" ", "_") for c in w.columns]
if "datetime" not in w.columns:
    raise ValueError("Expected 'datetime' column in weather.")
w["wx_dt"] = pd.to_datetime(w["datetime"], errors="coerce", utc=False)
bad_w = w["wx_dt"].isna().sum()
if bad_w:
    print(f"Dropping {bad_w} weather rows with invalid wx_dt")
w = w.dropna(subset=["wx_dt"]).sort_values("wx_dt").reset_index(drop=True)


Dropping 66 flights with invalid dep_dt


In [3]:
print("FLIGHTS dep_dt range:", f["dep_dt"].min(), "→", f["dep_dt"].max())
print("WEATHER wx_dt range:", w["wx_dt"].min(), "→", w["wx_dt"].max())


FLIGHTS dep_dt range: 2019-12-01 00:15:00 → 2019-12-31 21:27:00
WEATHER wx_dt range: 2019-11-30 00:00:00 → 2019-12-31 23:00:00


# Merging Datasets

pd.merge_asof is an “as-of” left join that for each left row finds the last right row with right_key <= left_key (direction="backward") within an optional tolerance. Here it attaches the most recent prior weather record to each flight (3‑hour tolerance) to avoid using future weather; using .view("int64") nanosecond keys makes the join/tolerance precise and fast.


In [34]:
f["dep_key"] = f["dep_dt"].view("int64")
w["wx_key"]  = w["wx_dt"].view("int64")

f = f.sort_values("dep_key").reset_index(drop=True)
w = w.sort_values("wx_key").reset_index(drop=True)

TOL_NS = pd.Timedelta("3H").value

merged = pd.merge_asof(
    f, w,
    left_on="dep_key",
    right_on="wx_key",
    direction="backward",
    tolerance=TOL_NS
)

merged["wx_missing"] = merged["wx_dt"].isna()
merged["wx_staleness_min"] = (merged["dep_dt"] - merged["wx_dt"]).dt.total_seconds() / 60

merged[["flightdate","scheduled_departure_time","dep_dt","wx_dt","wx_staleness_min","wx_missing"]].head(10)


  f["dep_key"] = f["dep_dt"].view("int64")
  w["wx_key"]  = w["wx_dt"].view("int64")
  TOL_NS = pd.Timedelta("3H").value


Unnamed: 0,flightdate,scheduled_departure_time,dep_dt,wx_dt,wx_staleness_min,wx_missing
0,2019-12-01,00:15:00,2019-12-01 00:15:00,2019-12-01 00:00:00,15.0,False
1,2019-12-01,00:15:00,2019-12-01 00:15:00,2019-12-01 00:00:00,15.0,False
2,2019-12-01,00:15:00,2019-12-01 00:15:00,2019-12-01 00:00:00,15.0,False
3,2019-12-01,00:15:00,2019-12-01 00:15:00,2019-12-01 00:00:00,15.0,False
4,2019-12-01,06:00:00,2019-12-01 06:00:00,2019-12-01 06:00:00,0.0,False
5,2019-12-01,06:00:00,2019-12-01 06:00:00,2019-12-01 06:00:00,0.0,False
6,2019-12-01,06:15:00,2019-12-01 06:15:00,2019-12-01 06:00:00,15.0,False
7,2019-12-01,06:20:00,2019-12-01 06:20:00,2019-12-01 06:00:00,20.0,False
8,2019-12-01,06:55:00,2019-12-01 06:55:00,2019-12-01 06:00:00,55.0,False
9,2019-12-01,07:00:00,2019-12-01 07:00:00,2019-12-01 07:00:00,0.0,False


In [5]:
n_total = len(merged)
n_match = (~merged["wx_missing"]).sum()
print(f"Rows: {n_total:,}")
print(f"Matched within tolerance: {n_match:,} ({(n_match/n_total if n_total else np.nan):.1%})")

print("\nStaleness (minutes) — matched only:")
print(merged.loc[~merged["wx_missing"], "wx_staleness_min"].describe())


Rows: 5,190
Matched within tolerance: 5,190 (100.0%)

Staleness (minutes) — matched only:
count    5190.000000
mean       27.764162
std        18.491144
min         0.000000
25%        10.000000
50%        30.000000
75%        45.000000
max        59.000000
Name: wx_staleness_min, dtype: float64


In [9]:
merged.columns.tolist

<bound method IndexOpsMixin.tolist of Index(['airline', 'flight_number', 'plane_id', 'flightdate', 'origin_airport',
       'destination_airport', 'flight_distance', 'scheduled_departure_time',
       'actual_departure_time', 'departure_delay_minutes', 'departure_taxi',
       'departure_wheelsoff', 'scheduled_arrival_time', 'actual_arrival_time',
       'arrival_delay_minutes', 'arrival_taxi', 'arrival_wheelson',
       'delay_reason', 'dayofweek', 'month', 'quarter', 'isweekend',
       'dep_hour', 'arr_hour', 'is_redeye', 'departure_delay_recomputed',
       'arrival_delay_recomputed', 'scheduled_blocktime_min',
       'actual_blocktime_min', 'total_taxi_time_min', 'airborne_time_min',
       'is_departure_delayed_15', 'is_arrival_delayed_15', 'dep_dt', 'dep_key',
       'datetime', 'airport', 'summary', 'precipintensity',
       'precipprobability', 'preciptype', 'precipaccumulation', 'temperature',
       'apparenttemperature', 'dewpoint', 'humidity', 'pressure', 'windspeed',
    

# Feature engineering

In [16]:
df = merged.copy()

# Label
df["is_delayed_15"] = df["is_arrival_delayed_15"].astype(int)

# Drop leakage columns (actuals and arrival-side outcomes)
leak_cols = [
    "actual_departure_time","departure_delay_minutes","departure_taxi","departure_wheelsoff",
    "actual_arrival_time","arrival_delay_minutes","arrival_taxi","arrival_wheelson",
    "arrival_delay_recomputed","is_departure_delayed_15","is_arrival_delayed_15"
]
df = df.drop(columns=[c for c in leak_cols if c in df.columns], errors="ignore")


In [17]:
base_num = [
    "dep_hour","dayofweek","month","quarter","isweekend",
    "flight_distance","scheduled_blocktime_min","total_taxi_time_min","airborne_time_min",
    "wx_staleness_min"
]
wx_num = [
    "precipintensity","precipprobability","precipaccumulation","temperature",
    "apparenttemperature","dewpoint","humidity","pressure","windspeed","windgust",
    "windbearing","cloudcover","uvindex","visibility","ozone","temp_dew_diff_f"
]
wx_flags = ["is_low_vis","is_precip"]
num_feats = [c for c in base_num + wx_num + wx_flags if c in df.columns]

cat_feats = [c for c in ["airline","destination_airport"] if c in df.columns]

print("Numeric:", len(num_feats), "| Categorical:", cat_feats)


Numeric: 28 | Categorical: ['airline', 'destination_airport']


In [32]:
df = df.sort_values("dep_dt").reset_index(drop=True)
n = len(df)
i_tr, i_va = int(0.70*n), int(0.85*n)

X_train = df.iloc[:i_tr][num_feats + cat_feats]
X_val   = df.iloc[i_tr:i_va][num_feats + cat_feats]
X_test  = df.iloc[i_va:][num_feats + cat_feats]

y_train = df.iloc[:i_tr]["is_delayed_15"].values
y_val   = df.iloc[i_tr:i_va]["is_delayed_15"].values
y_test  = df.iloc[i_va:]["is_delayed_15"].values

print(f"train={len(X_train)}, val={len(X_val)}, test={len(X_test)}")
print("ranges:", df.iloc[:i_tr]["dep_dt"].min(), "→", df.iloc[:i_tr]["dep_dt"].max(),
      "|", df.iloc[i_va:]["dep_dt"].min(), "→", df.iloc[i_va:]["dep_dt"].max())


train=3632, val=779, test=779
ranges: 2019-12-01 00:15:00 → 2019-12-22 07:06:00 | 2019-12-27 08:10:00 → 2019-12-31 21:27:00


The cell below builds the preprocessing (median imputer for numeric; most-frequent + OHE for categorical), ColumnTransformer + Pipeline with HistGradientBoostingClassifier, computes balanced sample weights and fits the model.


In [21]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.utils.class_weight import compute_class_weight

num_t = Pipeline([("imp", SimpleImputer(strategy="median"))])
cat_t = Pipeline([
    ("imp", SimpleImputer(strategy="most_frequent")),
    ("oh", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
]) if cat_feats else "drop"

pre = ColumnTransformer(
    [("num", num_t, [c for c in num_feats if c in X_train.columns]),
     ("cat", cat_t, [c for c in cat_feats if c in X_train.columns])],
    remainder="drop"
)

clf = HistGradientBoostingClassifier(
    learning_rate=0.06, max_leaf_nodes=31, min_samples_leaf=50, random_state=42
)

classes = np.array([0,1])
cw = compute_class_weight("balanced", classes=classes, y=y_train)
w_map = {cls:w for cls,w in zip(classes, cw)}
w_train = np.vectorize(w_map.get)(y_train)

pipe = make_pipeline(pre, clf)
pipe.fit(X_train, y_train, histgradientboostingclassifier__sample_weight=w_train)


0,1,2
,steps,"[('columntransformer', ...), ('histgradientboostingclassifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,loss,'log_loss'
,learning_rate,0.06
,max_iter,100
,max_leaf_nodes,31
,max_depth,
,min_samples_leaf,50
,l2_regularization,0.0
,max_features,1.0
,max_bins,255
,categorical_features,'from_dtype'


In [22]:
from sklearn.metrics import average_precision_score, roc_auc_score, f1_score, confusion_matrix

def eval_split(name, X, y):
    proba = pipe.predict_proba(X)[:,1]
    ap = average_precision_score(y, proba)
    try:
        roc = roc_auc_score(y, proba)
    except ValueError:
        roc = np.nan
    f1_05 = f1_score(y, (proba>=0.5).astype(int), zero_division=0)
    k = max(1, int(0.10*len(proba)))
    thr_k = np.partition(proba, -k)[-k]
    f1_k = f1_score(y, (proba>=thr_k).astype(int), zero_division=0)
    print(f"[{name}] PR-AUC={ap:.3f} | ROC-AUC={roc:.3f} | F1@0.5={f1_05:.3f} | F1@Top10%={f1_k:.3f}")
    return proba, thr_k

_ = eval_split("VAL", X_val, y_val)


[VAL] PR-AUC=0.229 | ROC-AUC=0.511 | F1@0.5=0.144 | F1@Top10%=0.144


In [23]:
proba_test, thr_k_test = eval_split("TEST", X_test, y_test)
pred_k = (proba_test >= thr_k_test).astype(int)
cm = confusion_matrix(y_test, pred_k, labels=[0,1])
print("\nConfusion matrix @Top10% (rows=true, cols=pred):\n", cm)


[TEST] PR-AUC=0.747 | ROC-AUC=0.775 | F1@0.5=0.670 | F1@Top10%=0.411

Confusion matrix @Top10% (rows=true, cols=pred):
 [[493   3]
 [209  74]]


The code below the pipeline's preprocessor and estimator, transforms the validation features, and builds feature names (using get_feature_names_out or a fallback that expands OHE categories). It asserts name/column alignment, computes permutation importances of the estimator on the processed validation set (scoring PR-AUC), creates a DataFrame of features with mean importances, and shows the top 15.

In [None]:
from sklearn.inspection import permutation_importance

pre = pipe.named_steps["columntransformer"]
est = pipe.named_steps["histgradientboostingclassifier"]
Xv_proc = pre.transform(X_val)
yv = y_val

feature_names = None
try:
    feature_names = pre.get_feature_names_out()
except Exception:
    names = []
    if "num" in dict(pre.transformers_):
        num_cols = dict(pre.transformers_)["num"][2]
        names.extend(num_cols)
    if "cat" in dict(pre.transformers_) and cat_feats:
        cat_cols = dict(pre.transformers_)["cat"][2]
        ohe = pre.named_transformers_["cat"].named_steps["oh"]
        for col_name, cats in zip(cat_cols, ohe.categories_):
            names.extend([f"{col_name}={c}" for c in cats])
    feature_names = np.array(names, dtype=object)

assert Xv_proc.shape[1] == len(feature_names), \
    f"Shape mismatch: X has {Xv_proc.shape[1]} cols, but {len(feature_names)} names."

perm = permutation_importance(
    est, Xv_proc, yv, n_repeats=5, random_state=42, scoring="average_precision"
)

imp = (pd.DataFrame({
        "feature": feature_names,
        "importance": perm.importances_mean
     })
     .sort_values("importance", ascending=False))

imp.head(15)


Unnamed: 0,feature,importance
7,num__total_taxi_time_min,0.039491
23,num__visibility,0.008244
8,num__airborne_time_min,0.005166
15,num__dewpoint,0.003715
0,num__dep_hour,0.003193
25,num__temp_dew_diff_f,0.002796
14,num__apparenttemperature,0.002629
9,num__wx_staleness_min,0.002231
76,cat__destination_airport_LPL,0.001689
17,num__pressure,0.000577


In [26]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import average_precision_score
import numpy as np
import pandas as pd

def make_pipe(num_feats, cat_feats, cfg):
    num_t = Pipeline([("imp", SimpleImputer(strategy="median"))])
    cat_t = Pipeline([
        ("imp", SimpleImputer(strategy="most_frequent")),
        ("oh",  OneHotEncoder(handle_unknown="ignore", sparse_output=False))
    ]) if cat_feats else "drop"
    pre = ColumnTransformer(
        [("num", num_t, [c for c in num_feats if c in X_train.columns]),
         ("cat", cat_t, [c for c in cat_feats if c in X_train.columns])],
        remainder="drop"
    )
    clf = HistGradientBoostingClassifier(
        learning_rate=cfg["lr"],
        max_leaf_nodes=cfg["leaf"],
        min_samples_leaf=cfg["minleaf"],
        random_state=42
    )
    return Pipeline([("pre", pre), ("clf", clf)])

configs = [
    {"lr":0.03,"leaf":31,"minleaf":50},
    {"lr":0.06,"leaf":31,"minleaf":50},
    {"lr":0.10,"leaf":31,"minleaf":50},
    {"lr":0.06,"leaf":63,"minleaf":50},
    {"lr":0.06,"leaf":31,"minleaf":100},
    {"lr":0.03,"leaf":63,"minleaf":100},
]

def class_weights(y):
    cls = np.array([0,1])
    w  = compute_class_weight("balanced", classes=cls, y=y)
    m  = {c:w for c,w in zip(cls,w)}
    return np.vectorize(m.get)(y)

rows = []
best = None
for i,cfg in enumerate(configs,1):
    pipe_i = make_pipe(num_feats, cat_feats, cfg)
    w_train = class_weights(y_train)
    pipe_i.fit(X_train, y_train, clf__sample_weight=w_train)
    proba_val = pipe_i.predict_proba(X_val)[:,1]
    ap = average_precision_score(y_val, proba_val)
    rows.append({"cfg":cfg,"val_pr_auc":ap})
    if best is None or ap>best[0]:
        best = (ap, cfg, pipe_i)

cv_tbl = pd.DataFrame(rows).sort_values("val_pr_auc", ascending=False).reset_index(drop=True)
print(cv_tbl)
best_ap, best_cfg, best_pipe = best
print("\nSelected:", best_cfg, "VAL PR-AUC:", round(best_ap,3))


                                        cfg  val_pr_auc
0   {'lr': 0.03, 'leaf': 31, 'minleaf': 50}    0.236428
1    {'lr': 0.1, 'leaf': 31, 'minleaf': 50}    0.231970
2   {'lr': 0.06, 'leaf': 31, 'minleaf': 50}    0.228860
3   {'lr': 0.06, 'leaf': 63, 'minleaf': 50}    0.225753
4  {'lr': 0.03, 'leaf': 63, 'minleaf': 100}    0.223554
5  {'lr': 0.06, 'leaf': 31, 'minleaf': 100}    0.217448

Selected: {'lr': 0.03, 'leaf': 31, 'minleaf': 50} VAL PR-AUC: 0.236


In [27]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import average_precision_score, roc_auc_score, f1_score, confusion_matrix, brier_score_loss

X_trv = pd.concat([X_train, X_val], axis=0)
y_trv = np.concatenate([y_train, y_val])

w_trv = class_weights(y_trv)
best_pipe.fit(X_trv, y_trv, clf__sample_weight=w_trv)

cal = CalibratedClassifierCV(best_pipe, cv="prefit", method="isotonic")
cal.fit(X_val, y_val)

proba_test = cal.predict_proba(X_test)[:,1]

ap = average_precision_score(y_test, proba_test)
try:
    roc = roc_auc_score(y_test, proba_test)
except ValueError:
    roc = np.nan
f1_05 = f1_score(y_test, (proba_test>=0.5).astype(int), zero_division=0)
print(f"[TEST] PR-AUC={ap:.3f} | ROC-AUC={roc:.3f} | F1@0.5={f1_05:.3f} | Brier={brier_score_loss(y_test, proba_test):.4f}")


[TEST] PR-AUC=0.707 | ROC-AUC=0.783 | F1@0.5=0.629 | Brier=0.1793




In [28]:
def eval_at_threshold(y_true, proba, thr, name):
    pred = (proba>=thr).astype(int)
    cm = confusion_matrix(y_true, pred, labels=[0,1])
    f1 = f1_score(y_true, pred, zero_division=0)
    print(f"{name}: thr={thr:.4f} | F1={f1:.3f}\nCM (rows=true, cols=pred)\n{cm}\n")
    return pred, cm, f1

# Top-K (10%)
k = max(1, int(0.10*len(proba_test)))
thr_topk = np.partition(proba_test, -k)[-k]
_ = eval_at_threshold(y_test, proba_test, thr_topk, "TEST @Top10%")

# Cost-based (optional): set business costs here
C_FN = 5.0  # missed delay
C_FP = 1.0  # false alarm
qs = np.linspace(0.50, 0.99, 25)
best_cost, best_thr = None, None
for q in qs:
    thr = np.quantile(proba_test, q)
    pred = (proba_test>=thr).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
    cost = C_FN*fn + C_FP*fp
    if best_cost is None or cost<best_cost:
        best_cost, best_thr = cost, thr
print(f"Cost-opt threshold: thr={best_thr:.4f} | Cost={best_cost:.1f}")
_ = eval_at_threshold(y_test, proba_test, best_thr, "TEST @CostOpt")


TEST @Top10%: thr=1.0000 | F1=0.589
CM (rows=true, cols=pred)
[[477  19]
 [157 126]]

Cost-opt threshold: thr=0.1010 | Cost=504.0
TEST @CostOpt: thr=0.1010 | F1=0.603
CM (rows=true, cols=pred)
[[237 259]
 [ 49 234]]



In [29]:
def slice_report(df_all, proba, y, col, topn=8):
    s = df_all[col].value_counts().head(topn).index.tolist()
    print(f"\nSlice: {col} (top {len(s)})")
    for v in s:
        m = df_all[col]==v
        if m.sum()<25: continue
        ap = average_precision_score(y[m], proba[m])
        try:
            roc = roc_auc_score(y[m], proba[m])
        except:
            roc = np.nan
        print(f"  {v:<12} n={m.sum():>4} | PR-AUC={ap:.3f} | ROC-AUC={roc:.3f}")

df_test_slice = df.iloc[i_va:].copy()
slice_report(df_test_slice, proba_test, y_test, "destination_airport") if "destination_airport" in df_test_slice.columns else None
slice_report(df_test_slice, proba_test, y_test, "dep_hour")
if "is_low_vis" in df_test_slice.columns:  slice_report(df_test_slice, proba_test, y_test, "is_low_vis", topn=2)
if "is_precip" in df_test_slice.columns:   slice_report(df_test_slice, proba_test, y_test, "is_precip", topn=2)



Slice: destination_airport (top 8)
  UEU          n=  36 | PR-AUC=0.542 | ROC-AUC=0.820
  BEX          n=  27 | PR-AUC=0.929 | ROC-AUC=0.915

Slice: dep_hour (top 8)
  20           n=  67 | PR-AUC=0.764 | ROC-AUC=0.804
  18           n=  67 | PR-AUC=0.633 | ROC-AUC=0.740
  12           n=  63 | PR-AUC=0.734 | ROC-AUC=0.762
  15           n=  58 | PR-AUC=0.780 | ROC-AUC=0.883
  19           n=  57 | PR-AUC=0.619 | ROC-AUC=0.621
  8            n=  53 | PR-AUC=0.624 | ROC-AUC=0.759
  17           n=  50 | PR-AUC=0.591 | ROC-AUC=0.678
  10           n=  50 | PR-AUC=0.597 | ROC-AUC=0.698

Slice: is_low_vis (top 2)
  0            n= 774 | PR-AUC=0.707 | ROC-AUC=0.782

Slice: is_precip (top 2)
  0            n= 676 | PR-AUC=0.680 | ROC-AUC=0.770
  1            n= 103 | PR-AUC=0.792 | ROC-AUC=0.756


In [30]:
deploy_model = {
    "model": cal,
    "features_num": num_feats,
    "features_cat": cat_feats,
    "threshold_topk": thr_topk,
    "threshold_costopt": best_thr,
    "config": best_cfg
}

print("Deploy artifact ready (in memory). Keys:", list(deploy_model.keys()))


Deploy artifact ready (in memory). Keys: ['model', 'features_num', 'features_cat', 'threshold_topk', 'threshold_costopt', 'config']
