In [5]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ---------- (A) Confidence interval for test accuracy via Hoeffding ----------

def hoeffding_ci_for_accuracy(y_true, y_pred, delta=0.05):
    """
    Distribution-free CI for true accuracy using Hoeffding.
    Assumes i.i.d. test samples; 0/1 correctness is bounded in [0,1].
    """
    n = len(y_true)
    acc = accuracy_score(y_true, y_pred)
    rad = np.sqrt(np.log(2.0/delta)/(2.0*n))
    lower = max(0.0, acc - rad)
    upper = min(1.0, acc + rad)
    return acc, (lower, upper), rad

# Synthetic binary classification task
rng = np.random.default_rng(0)
n = 4000
d = 5
X = rng.normal(size=(n, d))
w = rng.normal(size=(d,))
logits = X @ w
probs = 1/(1+np.exp(-logits))
y = (rng.uniform(size=n) < probs).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000, random_state=0, stratify=y)

clf = LogisticRegression(max_iter=2000).fit(X_train, y_train)
y_hat = clf.predict(X_test)

acc, (lo, hi), rad = hoeffding_ci_for_accuracy(y_test, y_hat, delta=0.05)
print(f"Test accuracy = {acc:.3f}")
print(f"Hoeffding 95% CI = [{lo:.3f}, {hi:.3f}]  (± {rad:.3f})")

# Optional: compare to binomial proportion CIs (not distribution-free but often tighter):
# from statsmodels.stats.proportion import proportion_confint
# k = (y_test == y_hat).sum()
# n = len(y_test)
# wilson_lo, wilson_hi = proportion_confint(k, n, alpha=0.05, method='wilson')
# print(f"Wilson 95% CI   = [{wilson_lo:.3f}, {wilson_hi:.3f}]")

# ---------- (B) Small UCB1 demo (Hoeffding-based optimism) ----------

def ucb1(K=3, horizon=1000, means=(0.2, 0.5, 0.6), seed=1):
    rng = np.random.default_rng(seed)
    n_pulls = np.zeros(K, dtype=int)
    sum_rewards = np.zeros(K, dtype=float)
    rewards = []

    # Initialize: pull each arm once
    for a in range(K):
        r = float(rng.random() < means[a])
        n_pulls[a] += 1
        sum_rewards[a] += r
        rewards.append(r)

    for t in range(K+1, horizon+1):
        ucb = np.zeros(K)
        for a in range(K):
            mu_hat = sum_rewards[a] / n_pulls[a]
            bonus = np.sqrt(2*np.log(t) / n_pulls[a])  # from Hoeffding
            ucb[a] = mu_hat + bonus
        a_star = int(np.argmax(ucb))
        r = float(rng.random() < means[a])
        n_pulls[a_star] += 1
        sum_rewards[a_star] += r
        rewards.append(r)

    cum_reward = np.cumsum(rewards)
    regret = np.arange(1, horizon+1)*max(means) - cum_reward
    return cum_reward, regret, n_pulls, sum_rewards / np.maximum(1, n_pulls)

cum_reward, regret, counts, est_means = ucb1()
print(f"Pull counts per arm: {counts}")
print(f"Estimated means:      {np.round(est_means,3)}")
print(f"Cumulative regret at horizon: {regret[-1]:.1f}")


Test accuracy = 0.704
Hoeffding 95% CI = [0.661, 0.747]  (± 0.043)
Pull counts per arm: [355 365 280]
Estimated means:      [0.603 0.603 0.575]
Cumulative regret at horizon: 5.0


In [26]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# --------------------------
# 1) Data + 90/10 split
# --------------------------
data = load_breast_cancer()
X = data['data']
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.10, random_state=0, stratify=y
)

# --------------------------
# 2) Train model
# --------------------------
clf = LogisticRegression(solver='liblinear', max_iter=2000)
clf.fit(X_train, y_train)

train_acc = accuracy_score(y_train, clf.predict(X_train))
test_acc  = accuracy_score(y_test,  clf.predict(X_test))

# --------------------------
# 3) Hoeffding CI on TEST accuracy
# --------------------------
def hoeffding_ci_for_accuracy(y_true, y_pred, delta=0.05):
    """
    Distribution-free CI for true accuracy using Hoeffding.
    Assumes i.i.d. test samples; 0/1 correctness is bounded in [0,1].
    """
    n = len(y_true)
    acc = accuracy_score(y_true, y_pred)
    rad = np.sqrt(np.log(2.0/delta) / (2.0 * n))  # <-- key formula
    lower = max(0.0, acc - rad)
    upper = min(1.0, acc + rad)
    return acc, (lower, upper), rad

acc, (lo, hi), rad = hoeffding_ci_for_accuracy(y_test, clf.predict(X_test), delta=0.05)

print(f"Train accuracy: {train_acc:.3f}")
print(f"Test  accuracy: {test_acc:.3f}")
print(f"Hoeffding 95% CI for TRUE accuracy: [{lo:.3f}, {hi:.3f}]  (± {rad:.3f})")

# --------------------------
# 4) Generalization bound (statement)
# --------------------------
# With probability at least 1 - delta over the draw of the TEST set:
# | true_accuracy - test_accuracy | <= sqrt( ln(2/delta) / (2 * n_test) )
# Equivalently, a conservative lower-bound guarantee on true accuracy is:
true_acc_lower_bound = max(0.0, test_acc - rad)
print(f"95% lower-bound guarantee on true accuracy: {true_acc_lower_bound:.3f}")


Train accuracy: 0.957
Test  accuracy: 0.947
Hoeffding 95% CI for TRUE accuracy: [0.767, 1.000]  (± 0.180)
95% lower-bound guarantee on true accuracy: 0.767


In [13]:
print(data.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
