In [119]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import KBinsDiscretizer, LabelEncoder
import pandas as pd
import seaborn as sns

In [65]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec
from itertools import chain, combinations

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.preprocessing import KBinsDiscretizer, LabelEncoder, StandardScaler
from sklearn.datasets import make_blobs, load_wine

from data_functions import load_data
from error_probs_model import ErrorProbsModel
from parzen_window_classifier import PWC

from functools import partial

from calibration import get_calibration_errors, get_ece, get_mce
from numpy.random import default_rng
rng = default_rng(12345)

In [3]:
data_set_names = pd.read_csv('./data/data_set_ids.csv').name.values

In [4]:
data_set_names

array(['breast-cancer-wisconsin', 'blood-transfusion',
       'pima-indians-diabetes', 'ionosphere', 'sonar', 'biodegradation',
       'vehicle', 'ecoli', 'glass', 'vertebra-column', 'user-knowledge',
       'kc2', 'parkinsons', 'banknote', 'seeds', 'prnn-craps',
       'chscase-vine', 'wine', 'iris', 'segment', 'balance-scale',
       'seismic-bumps', 'steel-plates-fault', 'phoneme', 'satimage',
       'wind'], dtype=object)

In [5]:
def eval_classifier(classifier, X_test, y_test, results):
	
	y_pred = classifier.predict(X_test)
	results['accuracies'].append(np.round(np.sum(y_pred == y_test) / len(y_test), 3))

	results['mce'].append(np.round(get_mce(y_test, y_pred), 3))
	results['ece'].append(np.round(get_ece(y_test, y_pred), 3))

In [6]:
X, y_true, y = load_data('sonar-simulated-o')

X_train, X_test, y_train_idx, y_test_idx = train_test_split(X, np.arange(len(y_true)), test_size=0.4, random_state=42)

y_train = y[y_train_idx]
y_test = y_true[y_test_idx]
# Standardize
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
n_classes = len(np.unique(y_train))
budget = 5*n_classes

n_iterations = len(X_test) // budget

n_annotators = 5


In [25]:
y_DL = np.empty_like(y_train, dtype=float)
y_DL[:] = np.nan
# initial choice
idx = [i for l in [rng.choice(np.where(y_train==c)[0],5)
				    for c in np.unique(y_train)] for i in l]

mask = np.zeros_like(y_train, dtype=bool)
annotators = rng.integers(0,n_annotators, budget)

mask[idx, annotators] = True
y_DL[mask] = y_train[mask]

In [26]:
pwc = PWC(n_classes, metric='rbf', combine_labels=False, probabilistic=False)
#pwc.fit(X_train, y_DL)

In [42]:
pwc.fit(X_train, y_DL)

In [37]:
X_test.shape

(84, 60)

In [38]:
np.mean(pwc.predict(X_test) == y_test)

0.5238095238095238

In [39]:
epm = ErrorProbsModel(n_classes=n_classes)
epm.fit(X_train, y_train)

In [40]:
np.argmin(epm.predict(X_train), axis=1)

array([4, 3, 1, 1, 4, 1, 4, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 4, 3, 1, 4, 3,
       4, 2, 4, 3, 4, 4, 3, 4, 3, 4, 2, 4, 4, 2, 1, 4, 3, 4, 1, 2, 3, 3,
       1, 3, 4, 3, 4, 2, 1, 3, 3, 3, 1, 2, 4, 4, 2, 1, 3, 1, 3, 4, 3, 4,
       3, 3, 1, 3, 4, 2, 4, 4, 3, 4, 4, 3, 4, 1, 3, 3, 1, 1, 1, 1, 3, 4,
       0, 4, 4, 3, 2, 4, 1, 3, 2, 4, 3, 4, 1, 4, 4, 2, 3, 3, 3, 3, 2, 3,
       1, 4, 3, 2, 4, 3, 4, 3, 4, 3, 4, 1, 4, 0])

In [41]:
y_train[:10]

array([[1, 0, 0, 0, 0],
       [1, 0, 0, 0, 1],
       [0, 1, 1, 1, 1],
       [0, 0, 0, 1, 1],
       [1, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 1, 0, 1, 1],
       [0, 0, 1, 1, 1],
       [0, 1, 0, 0, 0]])

In [90]:
# y_train complete annotations
# y_DL already selected annotations
def select_random_annotator(X_train, y_DL, X_selected, n):
	pass

def select_annotator_epm(n, X_train, y_DL, X_selected):

	epm.fit(X_train, y_DL)
	label_accuracy = epm.predict(X_selected)
	annotators = np.argmax(label_accuracy, axis=1)

	return annotators

def selection(n, classifier, query_strategy, annotator_selection, X_train, y_train, y_DL):

	instances_idx = query_strategy(n, classifier, X_train, y_DL)
	annotators_idx = annotator_selection(n, X_train, y_DL, X_train[instances_idx])
	
	y_DL[instances_idx, annotators_idx] = y_train[instances_idx, annotators_idx]

	return y_DL

def margin_qs(n, classifier, X, y):

	p = classifier.predict_proba(X)
	instances_idx = np.argsort(np.diff(np.sort(p, axis=1))[:,-1])[:n]

	return instances_idx

def initial_choice(y_train, budget, n_annotators):

	y_DL = np.empty_like(y_train, dtype=float)
	y_DL[:] = np.nan
	
	# initial choice
	idx = [i for l in [rng.choice(np.where(y_train==c)[0],5)
						for c in np.unique(y_train)] for i in l]

	mask = np.zeros_like(y_train, dtype=bool)
	annotators = rng.integers(0, n_annotators, budget)

	mask[idx, annotators] = True
	y_DL[mask] = y_train[mask]
	
	return y_DL

In [94]:
def eval_classifier(classifier, X_test, y_test, results):
	
	y_pred = classifier.predict(X_test)
	results['accuracies'].append(np.round(np.sum(y_pred == y_test) / len(y_test), 3))

	results['mce'].append(np.round(get_mce(y_test, y_pred), 3))
	results['ece'].append(np.round(get_ece(y_test, y_pred), 3))



In [115]:
def run_experiment(data_set_name, seed, classifier, nbudget=5, query_strategy=margin_qs, 
				   annnotator_selection=select_random_annotator, verbose=False):

	X, y_true, y = load_data(data_set_name)

	n_classes = len(np.unique(y_true))
	n_instances, n_annotators = y.shape

	budget = nbudget * n_classes

	X_train, X_test, y_train_idx, y_test_idx = train_test_split(X, np.arange(len(y_true)), test_size=0.4, random_state=seed)

	y_train = y[y_train_idx]
	y_test = y_true[y_test_idx]

	y_DL = initial_choice(y_train, budget, n_annotators)
	
	# Standardize
	scaler = StandardScaler().fit(X_train)
	X_train = scaler.transform(X_train)
	X_test = scaler.transform(X_test)

	if isinstance(classifier, partial) and classifier.func == PWC:
		classifier = classifier(n_classes=n_classes)
	else:
		classifier = classifier()
	
	n_iterations = np.minimum(len(X_test) // (2*budget), 50)
	
	results = {'accuracies': [], 'mce': [], 'ece': [], 'api': [], 'classes' : n_classes,
			 'instances' : n_instances, 'annotators': n_annotators, 'max_annotations': []}

	# AL loop
	for i in range(n_iterations):

		classifier.fit(X_train, y_DL)
		
		eval_classifier(classifier, X_test, y_test, results)

		y_DL = selection(budget, classifier, query_strategy, annnotator_selection, X_train, y_train, y_DL)

		n_annotations = np.sum(~np.isnan(y_DL), axis=1)
		results['max_annotations'].append(np.max(n_annotations))
		results['api'].append(np.mean(n_annotations).round(2))

		if verbose:
			print(f'Iteration: {i}. Accuracy: {results['accuracies'][-1]}. MCE: {results['mce'][-1]}. ECE: {results['ece'][-1]}')
			print(f'Annotations per instance: {results['api'][-1]}. Max annotations: {results['max_annotations'][-1]}')

In [116]:
np.max(np.sum(~np.isnan(y_train), axis=1))

5

In [117]:
res = run_experiment(data_set_name='breast-cancer-wisconsin-simulated-o', seed=1, classifier=partial(PWC, metric='rbf'), 
					 nbudget=5, query_strategy=margin_qs, annnotator_selection=select_annotator_epm, verbose=True)

Iteration: 0. Accuracy: 0.772. MCE: 0.0. ECE: 0.0
Annotations per instance: 0.06. Max annotations: 1
Iteration: 1. Accuracy: 0.807. MCE: 0.309. ECE: 0.127
Annotations per instance: 0.09. Max annotations: 1
Iteration: 2. Accuracy: 0.895. MCE: 0.196. ECE: 0.079
Annotations per instance: 0.11. Max annotations: 1
Iteration: 3. Accuracy: 0.943. MCE: 0.076. ECE: 0.026
Annotations per instance: 0.13. Max annotations: 1
Iteration: 4. Accuracy: 0.908. MCE: 0.183. ECE: 0.075
Annotations per instance: 0.15. Max annotations: 1
Iteration: 5. Accuracy: 0.89. MCE: 0.204. ECE: 0.083
Annotations per instance: 0.18. Max annotations: 1
Iteration: 6. Accuracy: 0.877. MCE: 0.0. ECE: 0.0
Annotations per instance: 0.21. Max annotations: 1
Iteration: 7. Accuracy: 0.829. MCE: 0.319. ECE: 0.158
Annotations per instance: 0.22. Max annotations: 1
Iteration: 8. Accuracy: 0.864. MCE: 0.262. ECE: 0.118
Annotations per instance: 0.23. Max annotations: 2
Iteration: 9. Accuracy: 0.882. MCE: 0.232. ECE: 0.101
Annotation

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [118]:
def plot_mean_std(ax, data, x, label, color, linestyle='-'):

	mean = np.mean(data, axis=0)
	std = np.std(data, axis=0)

	lower_bound = np.maximum(mean - std, 0)  # Prevent negative values
	upper_bound = mean + std

	ax.plot(x, mean, label=label, color=color, linewidth=2, linestyle=linestyle)
	ax.fill_between(x, lower_bound, upper_bound, color=color, alpha=0.3, label=f"±1 Std Dev {label}")

In [None]:
skip = ['ecoli', 'glass']
simulation_type = 'o' # e {o, x, y}

for data_set_name in data_set_names:

	if data_set_name in skip: continue

	data_set_name = f'{data_set_name}-simulated-{simulation_type}.csv'
	print(data_set_name)	
	
	# Experiment details
	seeds = np.arange(20)
	nbudget = 5

	# classifier = GaussianNB
	classifier = partial(PWC, metric='rbf')

	results = []

	for seed in seeds:
		res = run_experiment(data_set_name=data_set_name, seed=1, classifier=partial(PWC, metric='rbf'), 
					 nbudget=5, query_strategy=margin_qs, annnotator_selection=select_annotator_epm, verbose=False
					 )
		results.append(res)

	# Extract accuracy, ECE, and MCE data
	acc = [x['accuracies'] for x in results]
	ece = [x['ece'] for x in results]
	mce = [x['mce'] for x in results]
	api = [x['api'] for x in results]
	max_annotations = [x['max_annotations'] for x in results]

	x = np.arange(len(acc[0])) * nbudget  # Budget values
	sns.set_theme(style='whitegrid')  # Use modern Seaborn styling

	# Create subplots
	fig, axes = plt.subplots(1, 2, figsize=(20, 6))

	# Plot accuracy on the left
	plot_mean_std(axes[0], acc, x, label="Accuracy", color="royalblue")
	axes[0].set_title("Accuracy", fontsize=14)
	axes[0].set_xlabel("Budget (*n_classes)", fontsize=12)
	axes[0].set_ylabel("Accuracy", fontsize=12)
	axes[0].legend(fontsize=10)

	# Plot ECE and MCE on the right
	plot_mean_std(axes[1], ece, x, label="ECE", color="orange")
	plot_mean_std(axes[1], mce, x, label="MCE", color="green", linestyle='--')
	axes[1].set_title("Calibration", fontsize=14)
	axes[1].set_xlabel("Budget (*n_classes)", fontsize=12)
	axes[1].set_ylabel("Calibration Metrics", fontsize=12)
	axes[1].legend(fontsize=10)

	# Set the overall title
	fig.suptitle(f"Results data set {data_set_name}", fontsize=16)

	# Adjust layout
	plt.tight_layout(rect=[0, 0, 1, 0.95])  # Leave space for the overall title

	# Show the plot
	plt.show()