In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# April 2024
# License: MIT

In [2]:
import numpy as np

np.random.seed(42)

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X = iris.data[:, :2]  # Use only the first two features
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [4]:
from scipy.spatial.distance import pdist

# Compute pairwise distances on the training data
distances = pdist(X_train)
median_distance = np.median(distances)
print(f'Median distance: {median_distance:.2f}')

Median distance: 1.08


In [5]:
from sklearn.neighbors import RadiusNeighborsClassifier

clf = RadiusNeighborsClassifier(radius=median_distance, outlier_label=-1)
clf.fit(X_train, y_train)

In [6]:
print(f'Training accuracy: {clf.score(X_train, y_train):.4f}')
print(f'Test accuracy: {clf.score(X_test, y_test):.4f}')

Training accuracy: 0.7946
Test accuracy: 0.7895


In [7]:
from sklearn.model_selection import GridSearchCV

param_grid = {'radius': [0.5, 0.75, 1.0, 1.25, 1.5]}
grid_search = GridSearchCV(
    RadiusNeighborsClassifier(outlier_label=-1), 
    param_grid, cv=3
)
grid_search.fit(X_train, y_train)
print(f'Best radius: {grid_search.best_params_["radius"]}')

Best radius: 0.75




In [8]:
print(f'Training accuracy: {grid_search.score(X_train, y_train):.4f}')
print(f'Test accuracy: {grid_search.score(X_test, y_test):.4f}')

Training accuracy: 0.8036
Test accuracy: 0.8421
