In [11]:
!pip install abstention

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Program Files\Python312\python.exe -m pip install --upgrade pip


In [12]:
import gzip
import numpy as np
from collections import defaultdict
from scipy.special import softmax
from sklearn.metrics import accuracy_score, roc_auc_score
from abstention.calibration import TempScaling
from abstention.label_shift import EMImbalanceAdapter
from tabpfn import TabPFNClassifier

def read_labels(file_handle):
    """Reads labels from a file handle and returns a one-hot encoded NumPy array."""
    labels = []
    for line in file_handle:
        label = int(line.rstrip())
        one_hot = np.zeros(10)  # Assuming 10 classes
        one_hot[label] = 1
        labels.append(one_hot)
    return np.array(labels)

def read_predictions(file_handle):
    """Reads predictions from a file handle and returns a NumPy array."""
    predictions = []
    for line in file_handle:
        decoded_line = line.decode("utf-8").rstrip().split("\t")
        numeric_predictions = [float(x) for x in decoded_line]
        predictions.append(numeric_predictions)
    return np.array(predictions)

def sample_data(X, y, sample_size):
    """Randomly samples a subset of X and y, maintaining correspondence.

    Args:
        X: Input features.
        y: Target labels.
        sample_size: Number of samples to take.

    Returns:
        A tuple containing the sampled X and y.
    """
    assert len(X) == len(y), "X and y must have the same length"
    indices = np.random.choice(len(X), size=sample_size, replace=False)
    return X[indices], y[indices]

# Load data
# From paper "Maximum Likelihood With Bias-Corrected Calibration is Hard-To-Beat at Label Shift Adaptation"
with gzip.open("demo_valid_labels.txt.gz", "rb") as f:
    valid_labels = read_labels(f)
with gzip.open("demo_valid_preds.txt.gz", "rb") as f:
    valid_preds = read_predictions(f)
with gzip.open("demo_shifted_test_preds.txt.gz", "rb") as f:
    shifted_test_preds = read_predictions(f)
with gzip.open("demo_shifted_test_labels.txt.gz", "rb") as f:
    shifted_test_labels = read_labels(f)

y_train = valid_labels
X_train = valid_preds
X_test = shifted_test_preds
y_test = shifted_test_labels

# Sample data
SAMPLE_SIZE = 500  # Use a constant
X_train_sampled, y_train_sampled = sample_data(X_train, y_train, SAMPLE_SIZE)

# BCTS Method
bcts_calibrator_factory = TempScaling(verbose=False, bias_positions='all')
imbalance_adapter = EMImbalanceAdapter(calibrator_factory=bcts_calibrator_factory)

imbalance_adapter_func = imbalance_adapter(
    valid_labels=y_train_sampled,
    tofit_initial_posterior_probs=X_test,
    valid_posterior_probs=X_train_sampled
)

adapted_shifted_test_preds = imbalance_adapter_func(X_test)

# TabPFN Method
y_train_sampled_tabpfn = np.argmax(y_train_sampled, axis=1)
y_test_tabpfn = np.argmax(y_test, axis=1)

tabpfn_model = TabPFNClassifier()
tabpfn_model.fit(X_train_sampled, y_train_sampled_tabpfn)

tabpfn_predictions = tabpfn_model.predict(X_test)
print("TabPFN Accuracy:", accuracy_score(y_test_tabpfn, tabpfn_predictions))

# No adjustment accuracy
unadapted_test_accuracy = np.mean(np.argmax(y_test, axis=-1) == np.argmax(X_test, axis=-1))
print("Unadapted Test Accuracy:", unadapted_test_accuracy)

# Adapted test accuracy (BCTS)
adapted_test_accuracy = np.mean(np.argmax(y_test, axis=-1) == np.argmax(adapted_shifted_test_preds, axis=-1))
print("Adapted Test Accuracy:", adapted_test_accuracy)



TabPFN Accuracy: 0.714
Unadapted Test Accuracy: 0.707
Adapted Test Accuracy: 0.988


In [13]:
import numpy as np
import scipy.stats as stats
from tabpfn import TabPFNClassifier
import pandas as pd
from abstention.calibration import TempScaling
from abstention.label_shift import EMImbalanceAdapter
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

def gaussian_mixture(n_samples=1000, proportion=0.5, mean1=[0, 0], cov1=[[1, 0.5], [0.5, 1]],
                     mean2=[1, 1], cov2=[[1, -0.3], [-0.3, 1]]):
    """
    Generates a mixture of two Gaussian distributions.
    
    Parameters:
    - n_samples: Total number of samples.
    - proportion: Proportion of samples from the first Gaussian (between 0 and 1).
    - mean1, cov1: Mean and covariance of the first Gaussian.
    - mean2, cov2: Mean and covariance of the second Gaussian.
    
    Returns:
    - X: (n_samples, d) array of generated data points.
    - y: (n_samples,) array of labels (0 for first Gaussian, 1 for second Gaussian).
    """
    n1 = int(n_samples * proportion)
    n2 = n_samples - n1
    
    X1 = np.random.multivariate_normal(mean1, cov1, n1)
    X2 = np.random.multivariate_normal(mean2, cov2, n2)
    
    y1 = np.zeros(n1)
    y2 = np.ones(n2)
    
    # Combine and shuffle
    X = np.vstack((X1, X2))
    y = np.hstack((y1, y2))
    indices = np.random.permutation(n_samples)
    
    return X[indices], y[indices]

In [14]:
X_train, y_train = gaussian_mixture(n_samples=800, proportion=0.3)
X_val, y_val = gaussian_mixture(n_samples=800, proportion=0.3)
X_test, y_test = gaussian_mixture(n_samples=800, proportion=0.7)

In [15]:
tabpfn_model = TabPFNClassifier()
tabpfn_model.fit(X_train, y_train)
tabpfn_predictions = tabpfn_model.predict(X_test)
print("TabPFN Accuracy:", accuracy_score(y_test, tabpfn_predictions))



TabPFN Accuracy: 0.6625


In [16]:
lr_classifier = LogisticRegression()
lr_classifier.fit(X_train, y_train)

y_pred_test = lr_classifier.predict(X_test)
accuracy_val = accuracy_score(y_test, y_pred_test)
print(f"Validation Accuracy: {accuracy_val}")
print(classification_report(y_test, y_pred_test))

y_pred_val = lr_classifier.predict(X_val)
accuracy_val = accuracy_score(y_val, y_pred_val)
print(f"Validation Accuracy: {accuracy_val}")
print(classification_report(y_val, y_pred_val))

Validation Accuracy: 0.67375
              precision    recall  f1-score   support

         0.0       0.95      0.57      0.71       560
         1.0       0.48      0.93      0.63       240

    accuracy                           0.67       800
   macro avg       0.71      0.75      0.67       800
weighted avg       0.81      0.67      0.68       800

Validation Accuracy: 0.8175
              precision    recall  f1-score   support

         0.0       0.75      0.59      0.66       240
         1.0       0.84      0.92      0.88       560

    accuracy                           0.82       800
   macro avg       0.79      0.75      0.77       800
weighted avg       0.81      0.82      0.81       800



In [17]:
# BCTS Method
bcts_calibrator_factory = TempScaling(verbose=False, bias_positions='all')
imbalance_adapter = EMImbalanceAdapter(calibrator_factory=bcts_calibrator_factory)

val_prob = lr_classifier.predict_proba(X_val)
test_prob = lr_classifier.predict_proba(X_test)

imbalance_adapter_func = imbalance_adapter(
    valid_labels=y_val,
    tofit_initial_posterior_probs=test_prob,
    valid_posterior_probs=val_prob
)

adapted_shifted_test_preds = imbalance_adapter_func(test_prob)
# Adapted test accuracy (BCTS)
adapted_test_accuracy = np.mean(y_test == np.argmax(adapted_shifted_test_preds, axis=-1))
print("Adapted Test Accuracy:", adapted_test_accuracy)

Adapted Test Accuracy: 0.745
