In [2]:
# Environment prep for WIT on Apple Silicon
# Pin protobuf to avoid descriptor errors in witwidget vendored protos
%pip install -q "protobuf<3.21"
import os
os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python")
print("Environment prepped for WIT")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.20.0 requires protobuf>=5.28.0, but you have protobuf 3.20.3 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.
Environment prepped for WIT


# Google What-If / Fairness Indicators demo
Notes: The What-If Tool (`witwidget`) works in Jupyter but may require additional setup. This notebook provides guidance and a minimal example.

In [3]:
# What-If Tool fairness exploration on Adult dataset
import json
import numpy as np
import pandas as pd
from pathlib import Path

# Import WIT
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

# Load data and model from our scripts
from scripts.load_adult import load_adult
import joblib

df = load_adult()
X = df.drop(columns=["income_binary"])
y = df["income_binary"].astype(int)

model = joblib.load("../models/logreg_adult.joblib") if Path("../models/logreg_adult.joblib").exists() else joblib.load("models/logreg_adult.joblib")
preds = model.predict_proba(X)[:, 1]
pred_labels = (preds >= 0.5).astype(int)

# Build examples for WIT: list of dicts with features + label + pred
examples = []
for i in range(len(X)):
    record = {k: (bool(v) if isinstance(v, (np.bool_, bool)) else float(v) if isinstance(v, (np.floating, float)) else int(v) if isinstance(v, (np.integer, int)) else v) for k, v in X.iloc[i].items()}
    record["label"] = int(y.iloc[i])
    record["pred_score"] = float(preds[i])
    record["pred_label"] = int(pred_labels[i])
    examples.append(record)

feature_names = list(X.columns)
config_builder = WitConfigBuilder(examples).set_target_feature("label").set_model_type("classification")
config_builder = config_builder.set_label_vocab(["0","1"])
config_builder = config_builder.set_predict_output_column("pred_score")

# Display WIT
WitWidget(config_builder, height=600)

# Compute and persist a simple by-group report for reference (Female vs Male)
out = Path("../outputs") if Path("../outputs").exists() else Path("outputs")
out.mkdir(exist_ok=True)
if "sex_Male" in X.columns:
    sens = X["sex_Male"].map({1: "Male", 0: "Female", True: "Male", False: "Female"})
else:
    # fallback to any one-hot sex_* column
    sex_cols = [c for c in X.columns if c.startswith("sex_")]
    sens = X[sex_cols[0]].map({1: sex_cols[0].split("sex_")[1], 0: f"not_{sex_cols[0].split('sex_')[1]}", True: sex_cols[0].split("sex_")[1], False: f"not_{sex_cols[0].split('sex_')[1]}"}) if sex_cols else pd.Series(["unknown"]*len(X))

def rates(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    pos = y_pred == 1
    neg = y_pred == 0
    tp = np.sum((y_true == 1) & pos)
    fp = np.sum((y_true == 0) & pos)
    fn = np.sum((y_true == 1) & neg)
    tn = np.sum((y_true == 0) & neg)
    selection = np.mean(pos) if len(y_true) else 0.0
    tpr = tp / (tp + fn + 1e-9)
    fpr = fp / (fp + tn + 1e-9)
    return selection, tpr, fpr

rows = []
for g, idx in sens.groupby(sens).groups.items():
    sel, tpr, fpr = rates(y.loc[idx], pred_labels[idx])
    rows.append({"group": g, "selection_rate": sel, "tpr": tpr, "fpr": fpr})
pd.DataFrame(rows).to_csv(out / "google_wit_by_group.csv", index=False)
json.dump({"note": "WIT run completed; by-group metrics saved for reproducibility."}, open(out / "google_wit_run.json", "w"))
print("Saved google_wit_by_group.csv and google_wit_run.json")

TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates