In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
from collections import deque
import random


# ======================
# *Polynomial-RadiusClustering*
# made by Radman Ghourchian
# Copyrighted 2025
# ======================
class Polynomial_RadiusClustering:
    def __init__(self, radius_values=np.arange(0.01, 3.01, 0.05),
                 min_cluster_size=5,
                 use_kernel=False,
                 degree=2,
                 coef0=1.0):
        """
        The discreiption for all the variabeles of the RadiusClustering:
          1. radius_values = It is the radiuses for the circles that start from 0.01 to 3.01 and go up by 0.05 so that we could nearly check everything with max speed
          2. min_cluster_size = if the model sees that there is for example 3 datas that aren't in any other group it will make a group for them but with min_cluster_size we say that if the datas in a group are forexample below 5 they should be considered noisy dats instead of another group
          3. use_kernel = This algorythm like for example svm has a kernel and you say if it should be on or off(the explanation of it is in the github readme)
          4. degree = when you have the use_kernel turned on, you could make clear that how many dims you want to your dataset to be and make your clustering better.
          5. coef0 = when we increase the dims we need to somehow make our datas just get ok with it so we want to incerase them with a number and the range of it is usually between 1 to 10 but I don't really reccomand more than 5.
        """
        self.radius_values = radius_values
        self.min_cluster_size = min_cluster_size
        self.use_kernel = use_kernel
        self.degree = degree
        self.coef0 = coef0

        # the variabeles for the variabeles that come after fitting
        self.best_radius_ = None
        self.best_score_ = None
        self.best_labels_ = None
        self.n_clusters_ = None


    # kernel
    def _polynomial_lift(self, X):
        X = np.asarray(X)
        lifted = [X]
        for d in range(2, self.degree + 1):
            lifted.append(np.power(X, d) + self.coef0)
        return np.hstack(lifted)


    # automatic radious finding
    def _cluster_with_radius(self, X, radius):
        n_samples = X.shape[0]
        labels = np.full(n_samples, -1, dtype=int)
        cluster_id = 0

        nn = NearestNeighbors(radius=radius).fit(X)
        neighbors_list = nn.radius_neighbors(X, return_distance=False)

        unassigned = set(range(n_samples))

        while unassigned:
            start = random.choice(tuple(unassigned))
            queue = [start]
            labels[start] = cluster_id
            unassigned.remove(start)

            while queue:
                idx = queue.pop(0)
                for nb in neighbors_list[idx]:
                    if labels[nb] == -1:
                        labels[nb] = cluster_id
                        queue.append(nb)
                        if nb in unassigned:
                            unassigned.remove(nb)

            cluster_id += 1

        # this code shows that if the datas in a cluster is less than for example 5 it would count it as noisy data
        for cid in range(cluster_id):
            members = np.where(labels == cid)[0]
            if len(members) < self.min_cluster_size:
                labels[members] = -1

        return labels


    # fitting the model
    def fit(self, X):
        X = np.asarray(X)

        # if 'use kernel' is on
        if self.use_kernel:
            X_proc = self._polynomial_lift(X)
        else:
            X_proc = X

        best_score = -np.inf
        best_radius = None
        best_labels = None
        best_n_clusters = None

        for r in self.radius_values:
            labels = self._cluster_with_radius(X_proc, r)
            n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

            if n_clusters <= 1 or n_clusters >= X_proc.shape[0]:
                continue

            try:
                score = silhouette_score(X_proc, labels)
            except Exception:
                continue

            if score > best_score:
                best_score = score
                best_radius = r
                best_labels = labels.copy()
                best_n_clusters = n_clusters

        # Saving the results in thier variaabels
        self.best_radius_ = best_radius
        self.best_score_ = best_score
        self.best_labels_ = best_labels
        self.n_clusters_ = best_n_clusters

        return self

    # adding the labels to our dataframe
    def add_labels_to_data(self, data):
        if self.best_labels_ is None:
            raise ValueError("The model isn't fit yet!")

        if isinstance(data, pd.DataFrame):
            df = data.copy()
            df["cluster_label"] = self.best_labels_
            return df
        else:
            return np.column_stack((data, self.best_labels_))

In [None]:
df = pd.read_csv("Your dataset")


model = Polynomial_RadiusClustering(use_kernel=True, degree=4, min_cluster_size = 7, coef0 = 2)
model.fit(df)

model_nok = Polynomial_RadiusClustering(use_kernel=False, degree=4, min_cluster_size = 7, coef0 = 2)
model_nok.fit(df)

print("Best radius:", model.best_radius_)
print("Best silhouette:", model.best_score_)
print("Clusters:", model.n_clusters_)

print("Best radius:", model_nok.best_radius_)
print("Best silhouette:", model_nok.best_score_)
print("Clusters:", model_nok.n_clusters_)

# Adding the labels
df = pd.DataFrame(df, columns=["You add all the columns of your dataset here"])
df_with_labels_kernel = model.add_labels_to_data(df)
df_with_labels_no_kernel = model_nok.add_labels_to_data(df)
print(df_with_labels3.head())



# Or you could do something like gridsearch

param_score = {}
param_score_list = []

deg_list = [1,2,3]
cluster_size_list = [2,3,4,5]
coef0_list = [0.5, 1, 2, 3]
for deg in deg_list:
  for clust in cluster_size_list:
    for c in coef0_list:
      raid_score = {}
      test_model = Polynomial_RadiusClustering(use_kernel=True, degree=deg, min_cluster_size = clust, coef0 = c)
      test_model.fit(df)
      raid_score = {'Degree' : deg, 'Cluster' : clust, "coef0" : c, "score" : test_model.best_score_}
      param_score_list.append(raid_score)

max = 0
best_params = None
for i in param_score_list:
  if i['score'] > max:
    max = i['score']
    best_params = i


print(best_params)

# If you want you could turn off use kernel or make another cell for it turned off to get the best conclussion

NameError: name 'pd' is not defined