In [None]:
import polars as pl
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [None]:
colo2d_path = "path_to_feature_set_1.parquet"
colo3d_path = "path_to_feature_set_2.parquet"
meta_ex = "path_to_ground_truth.parquet"


features_1 = pl.read_parquet(colo2d_path).to_numpy()
features_2 = pl.read_parquet(colo3d_path).to_numpy()
ground_truth = pl.read_parquet(meta_ex).to_numpy().flatten()

In [None]:
def benchmark_knn(features, labels, test_size=0.3, k=5):
    """
    Benchmark features using KNN classification against ground truth.

    Args:
        features (numpy.ndarray): Feature matrix.
        labels (numpy.ndarray): Ground truth labels.
        test_size (float): Proportion of data for testing.
        k (int): Number of neighbors for KNN.

    Returns:
        dict: Classification metrics including F1 score and AUC.
    """

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42)
    

    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    

    y_pred = knn.predict(X_test)
    y_proba = knn.predict_proba(X_test)[:, 1] if len(set(labels)) == 2 else None  # Probability for binary classification
    

    report = classification_report(y_test, y_pred, output_dict=True)
    auc = roc_auc_score(y_test, y_proba) if y_proba is not None else None

    return {
        "classification_report": report,
        "auc": auc,
    }

In [None]:
results_1 = benchmark_knn(features_1, ground_truth)

results_2 = benchmark_knn(features_2, ground_truth)


print("colo2D Results:")
print("Classification Report:", results_1["classification_report"])
if results_1["auc"] is not None:
    print("AUC:", results_1["auc"])



In [None]:
print("colo3D Results:")
print("Classification Report:", results_2["classification_report"])
if results_2["auc"] is not None:
    print("AUC:", results_2["auc"])