In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data):
        """
        Function to evaluate the Euclidean distance between each data point and centroids
        """
        distances = np.zeros((len(data), self.centroids.shape[0]))
        for i in range(self.centroids.shape[0]):
            diff = data - self.centroids[i]
            distances[:, i] = np.sqrt(np.sum(diff**2, axis=1))
        return distances
    
    def identify_25_percent(self, distances):
        """
        Function to identify the 25% nearest data points for each class
        """
        self.nearest_indices = np.argsort(distances, axis=0)[:int(0.25*len(distances))]
        return self.nearest_indices
    
# Load the Iris dataset
iris = load_iris()
data = iris['data']
target = iris['target']

# Create an instance of the ClassCentroidIdentifier class
identifier = ClassCentroidIdentifier()

# Set the centroids for each class
identifier.set_centroids(data, target)

# Evaluate the distances between each data point and the centroids
distances = identifier.evaluate_distance(data)

# Identify the 25% nearest data points for each class
nearest_indices = identifier.identify_25_percent(distances)

# Store the results in a pandas DataFrame
df = pd.DataFrame(data, columns=iris['feature_names'])
df['class'] = target
# df['nearest'] = np.isin(np.arange(len(data)), nearest_indices.flatten()).astype(int)
df['nearest'] = 0
df.iloc[nearest_indices.flatten(), -1] = 1

In [None]:
df[df['nearest']==1] 

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data):
        """
        Function to evaluate the Euclidean distance between each data point and centroids
        """
        distances = np.zeros((len(data), self.centroids.shape[0]))
        for i in range(self.centroids.shape[0]):
            diff = data - self.centroids[i]
            distances[:, i] = np.sqrt(np.sum(diff**2, axis=1))
        return distances
    
    def identify_25_percent(self, distances):
        """
        Function to identify the 25% nearest data points for each class
        """
        self.nearest_indices = []
        for i in range(distances.shape[1]):
            class_distances = distances[:, i]
            indices = np.argsort(class_distances)
            count = int(0.25 * len(class_distances))
            nearest_indices = indices[:count]
            self.nearest_indices.append(nearest_indices)
        self.nearest_indices = np.concatenate(self.nearest_indices)
        self.nearest_indices = np.sort(self.nearest_indices)
        return self.nearest_indices
    
# Load the Iris dataset
iris = load_iris()
data = iris['data']
target = iris['target']

# Create an instance of the ClassCentroidIdentifier class
identifier = ClassCentroidIdentifier()

# Set the centroids for each class
identifier.set_centroids(data, target)

# Evaluate the distances between each data point and the centroids
distances = identifier.evaluate_distance(data)

# Identify the 25% nearest data points for each class
nearest_indices = identifier.identify_25_percent(distances)

# Store the results in a pandas DataFrame
df = pd.DataFrame(data, columns=iris['feature_names'])
df['class'] = target
df['nearest'] = 1
df.iloc[nearest_indices, -1] = 0
df

In [None]:
df[df.nearest==1].shape

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.scatter(df['sepal length (cm)'], df['sepal width (cm)'], c=df['class'], cmap='viridis')
plt.scatter(df.loc[df['nearest']==1, 'sepal length (cm)'], df.loc[df['nearest']==1, 'sepal width (cm)'], c='red', marker='x')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.title('Iris Data with Centroids and 25% Nearest Data Points')
plt.show()

####


In [None]:
class ClassCentroidIdentifier:
    def __init__(self):
        # Initialize empty lists for centroids and nearest indices
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class
        """
        # Calculate unique classes in the class variable
        unique_classes = np.unique(class_var)
        
        # Initialize a zero matrix for storing centroids with number of rows equal to number of unique classes
        # and number of columns equal to number of features in the data
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        
        # Loop through each unique class
        for i, c in enumerate(unique_classes):
            # Filter the data to get only the data points belonging to the current class
            class_data = data[class_var == c]
            
            # Calculate the mean of the class data along the rows and store it as the centroid for the current class
            self.centroids[i, :] = np.mean(class_data, axis=0)
        
        # Return the centroids
        return self.centroids
    
    def evaluate_distance(self, data, class_var):
        """
        Function to evaluate the Euclidean distance between each class centroid and data points of other classes
        """
        # Get the unique classes in class_var
        unique_classes = np.unique(class_var)
        # Initialize a numpy array to store the distances
        distances = np.zeros((len(unique_classes), data.shape[0]))
        # Loop over each unique class
        for i, c in enumerate(unique_classes):
            # Get data points that belong to other classes (excluding data points from the current class)
            class_data = data[class_var != c]
            # Calculate the difference between the current class centroid and data points of other classes
            diff = class_data - self.centroids[i]
            # Calculate the Euclidean distance between the current class centroid and data points of other classes
            distances[i, :] = np.sqrt(np.sum(diff**2, axis=1))
        return distances

    def identify_25_percent(self, distances):
        """
        Function to identify the 25% nearest data points for each class centroid from other classes
        """
        # Initialize a list to store the indices of nearest data points for each class centroid
        self.nearest_indices = []
        # Loop over each class centroid
        for i in range(distances.shape[0]):
            # Get the distances of data points from other classes for the current class centroid
            class_distances = distances[i, :]
            # Sort the distances in ascending order to get the indices of nearest data points
            indices = np.argsort(class_distances)
            # Calculate the number of nearest data points (25% of total data points from other classes)
            count = int(0.25 * len(class_distances))
            # Get the indices of nearest data points for the current class centroid
            nearest_indices = indices[:count]
            # Add the nearest indices to the list
            self.nearest_indices.append(nearest_indices)
        # Concatenate all the nearest indices into a single array
        self.nearest_indices = np.concatenate(self.nearest_indices)
        # Sort the nearest indices in ascending order
        self.nearest_indices = np.sort(self.nearest_indices)
        return self.nearest_indices

In [None]:
import numpy as np

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data, class_var):
        """
        Function to evaluate the Euclidean distance between each centroid and the data points from other classes
        """
        distances = []
        unique_classes = np.unique(class_var)
        for i, c in enumerate(unique_classes):
            # select data points that belong to the current class
            class_data = data[class_var == c]
            # calculate the distances between each centroid and data points of the other classes
            for j, c2 in enumerate(unique_classes):
                if i != j:
                    class2_data = data[class_var == c2]
                    diff = np.expand_dims(self.centroids[i], axis=0) - class2_data
                    cur_distances = np.sqrt(np.sum(diff**2, axis=1))
                    distances.extend(cur_distances)
        distances = np.array(distances)
        return distances
    
    def identify_25_percent(self, distances):
        """
        Function to identify the 25% nearest data points for each centroid
        """
        count = int(0.25 * len(distances))
        nearest_indices = np.argsort(distances)[:count]
        return nearest_indices

# Load dataset
from sklearn.datasets import load_iris
iris = load_iris()
data = iris["data"]
class_var = iris["target"]

# Initialize ClassCentroidIdentifier
cci = ClassCentroidIdentifier()
cci.set_centroids(data, class_var)

# Calculate distances between centroids and data points of other classes
distances = cci.evaluate_distance(data, class_var)

# Identify the 25% nearest data points for each centroid
nearest_indices = cci.identify_25_percent(distances)

print("The indices of the 25% nearest data points for each centroid are:", nearest_indices)


In [None]:
nearest_indices

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class.
        The centroid of a class is calculated as the mean of the data points belonging to that class.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data, class_var):
        """
        Function to evaluate the Euclidean distance between each data point and the centroids of other classes.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (n, k) where k is the number of unique classes.
        """
        unique_classes = np.unique(class_var)
        distances = np.zeros((len(data), len(unique_classes)))
        for i, c in enumerate(unique_classes):
            other_class_data = data[class_var != c]
            other_class_centroid = np.mean(other_class_data, axis=0)
            diff = other_class_data - other_class_centroid
            distances[:, i] = np.sqrt(np.sum(diff**2, axis=1))
        return distances
    
    def identify_25_percent(self, distances):
        """
        Function to identify the 25% nearest data points for each class.
        
        Parameters:
        distances (np.array): A 2D array of shape (n, k) where n is the number of data points and k is the number of unique classes.
        
        Returns:
        np.array: A 1D array of length n, representing the indices of the nearest data points.
        """
        self.nearest_indices = []
        for i in range(distances.shape[1]):
            class_distances = distances[:, i]
            indices = np.argsort(class_distances)
            count = int(0.25 * len(class_distances))
            nearest_indices = indices[:count]
            self.nearest_indices.append(nearest_indices)
        self.nearest_indices = np.concatenate(self.nearest_indices)
        self.nearest_indices = np.sort(self.nearest_indices)
        return self.nearest_indices
    def plot_data_with_centroids(self, data, class_var):
        """
        Function to plot the data points and their classes with centroids
        """
        unique_classes = np.unique(class_var)
        colors = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'brown']

        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            plt.scatter(class_data[:, 0], class_data[:, 1], color=colors[i], label=f'Class {c}')

        for i, centroid in enumerate(self.centroids):
            plt.scatter(centroid[0], centroid[1], marker='x', color='black', s=100, label=f'Centroid {i}')

        nearest_points = data[self.nearest_indices]
        plt.scatter(nearest_points[:, 0], nearest_points[:, 1], marker='*', color='magenta', s=100, label='25% nearest')

        plt.legend()
        plt.show()

In [None]:
# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
data = iris.data
class_var = iris.target

# Create an instance of ClassCentroidIdentifier
identifier = ClassCentroidIdentifier()

# Set the centroids for each class
centroids = identifier.set_centroids(data, class_var)

# Evaluate the distances between each data point and the centroids of other classes
distances = identifier.evaluate_distance(data, class_var)

# Identify the 25% nearest data points for each class
nearest_indices = identifier.identify_25_percent(distances)

# Plot the data points and their classes with centroids
identifier.plot_data_with_centroids(data, class_var)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class.
        The centroid of a class is calculated as the mean of the data points belonging to that class.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data, class_var):
        """
        Function to evaluate the Euclidean distance between each data point and the centroids of other classes.

        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.

        Returns:
        np.array: A 2D array of shape (n, k) where k is the number of unique classes.
        """
        unique_classes = np.unique(class_var)
        distances = np.zeros((len(data), len(unique_classes)))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            class_centroid = np.mean(class_data, axis=0)
            diff = data - class_centroid
            distances[:, i] = np.sqrt(np.sum(diff**2, axis=1))
        return distances
        
    def identify_25_percent(self, distances, threshold=0.25):
        """
        Function to identify the nearest data points for each class.

        Parameters:
        distances (np.array): A 2D array of shape (n, k) where n is the number of data points and k is the number of unique classes.
        threshold (float): Threshold to determine the number of nearest data points to consider (default is 0.25)

        Returns:
        np.array: A 1D array of length n, representing the indices of the nearest data points.
        """
        self.nearest_indices = []
        for i in range(distances.shape[1]):
            class_distances = distances[:, i]
            indices = np.argsort(class_distances)
            count = int(threshold * len(class_distances))
            nearest_indices = indices[:count]
            self.nearest_indices.append(nearest_indices)
        self.nearest_indices = np.concatenate(self.nearest_indices)
        self.nearest_indices = np.sort(self.nearest_indices)
        return self.nearest_indices

    def plot_data_with_centroids(self, data, class_var):
        """
        Function to plot the data points and their classes with centroids
        """
        unique_classes = np.unique(class_var)
        colors = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'brown']

        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            plt.scatter(class_data[:, 0], class_data[:, 1], color=colors[i], label=f'Class {c}')

        for i, centroid in enumerate(self.centroids):
            plt.scatter(centroid[0], centroid[1], marker='x', color='black', s=100, label=f'Centroid {i}')

        nearest_points = data[self.nearest_indices]
        plt.scatter(nearest_points[:, 0], nearest_points[:, 1], marker='*', color='magenta', s=100, label='25% nearest')

        plt.legend()
        plt.show()

In [None]:
# Generate sample data
data = np.random.rand(100, 2)
class_var = np.random.randint(0, 2, 100)

# Initialize the ClassCentroidIdentifier object
cci = ClassCentroidIdentifier()

# Set the centroids
centroids = cci.set_centroids(data, class_var)

# Evaluate distances between each data point and other class centroids
distances = cci.evaluate_distance(data, class_var)

# Identify the nearest data points for each class
nearest_indices = cci.identify_25_percent(distances)

# Plot the data points, classes and centroids
cci.plot_data_with_centroids(data, class_var)

In [None]:
# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()


data = iris.data
class_var = iris.target

df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                  columns=iris['feature_names'] + ['target'])
df = df[df.target!=1]
df

In [None]:
data = df.drop('target', axis=1)
data

In [None]:
# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()

data = iris.data
class_var = iris.target

# Initialize the ClassCentroidIdentifier object
cci = ClassCentroidIdentifier()

# Set the centroids
centroids = cci.set_centroids(data, class_var)

# Evaluate distances between each data point and other class centroids
distances = cci.evaluate_distance(data, class_var)

# Identify the nearest data points for each class
nearest_indices = cci.identify_25_percent(distances)

# Plot the data points, classes and centroids
cci.plot_data_with_centroids(data, class_var)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class.
        The centroid of a class is calculated as the mean of the data points belonging to that class.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def evaluate_distance(self, data, class_var):
        """
        Function to evaluate the Euclidean distance between each data point and the centroids of other classes.

        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.

        Returns:
        np.array: A 2D array of shape (n, k) where k is the number of unique classes.
        """
        unique_classes = np.unique(class_var)
        distances = np.zeros((len(data), len(unique_classes)))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            class_centroid = np.mean(class_data, axis=0)
            diff = data - class_centroid
            distances[:, i] = np.sqrt(np.sum(diff**2, axis=1))
        return distances
        
    def identify_25_percent_other_classes(self, distances, threshold=0.25):
        """
        Function to identify the nearest data points for other classes.

        Parameters:
        distances (np.array): A 2D array of shape (n, k) where n is the number of data points and k is the number of unique classes.
        threshold (float): Threshold to determine the number of nearest data points to consider (default is 0.25)

        Returns:
        np.array: A 1D array of length n, representing the indices of the nearest data points for other classes.
        """
        self.nearest_indices_other_classes = []
        for i in range(distances.shape[1]):
            class_distances = distances[:, i]
            indices = np.argsort(class_distances)
            count = int(threshold * len(class_distances))
            nearest_indices = indices[count:] # changed here
            self.nearest_indices_other_classes.append(nearest_indices)
        self.nearest_indices_other_classes = np.concatenate(self.nearest_indices_other_classes)
        self.nearest_indices_other_classes = np.sort(self.nearest_indices_other_classes)
        return self.nearest_indices_other_classes

    def plot_data_with_centroids(self, data, class_var):
        """
        Function to plot the data points and their classes with centroids
        """
        unique_classes = np.unique(class_var)
        colors = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'brown']

        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            plt.scatter(class_data[:, 0], class_data[:, 1], color=colors[i], label=f'Class {c}')

        for i, centroid in enumerate(self.centroids):
            plt.scatter(centroid[0], centroid[1], marker='x', color='black', s=100, label=f'Centroid {i}')

        nearest_points = data[self.nearest_indices_other_classes] # changed here
        plt.scatter(nearest_points[:, 0], nearest_points[:, 1], marker='*', color='magenta', s=100, label='25% nearest (other classes)')

        plt.legend()
        plt.show()

In [None]:
# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()

data = iris.data
class_var = iris.target

# Initialize the ClassCentroidIdentifier object
cci = ClassCentroidIdentifier()

# Set the centroids
centroids = cci.set_centroids(data, class_var)

# Evaluate distances between each data point and other class centroids
distances = cci.evaluate_distance(data, class_var)

# Identify the nearest data points for each class
nearest_indices = cci.identify_25_percent_other_classes(distances, threshold=0.9)

# Plot the data points, classes and centroids
cci.plot_data_with_centroids(data, class_var)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

class ClassCentroidIdentifier:
    def __init__(self):
        self.centroids = None
        self.nearest_indices = None
    
    def set_centroids(self, data, class_var):
        """
        Function to set the centroids for each class.
        The centroid of a class is calculated as the mean of the data points belonging to that class.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        """
        unique_classes = np.unique(class_var)
        self.centroids = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            self.centroids[i, :] = np.mean(class_data, axis=0)
        return self.centroids
    
    def get_furthest_points(self, data, class_var):
        """
        Function to get the furthest data point in the 90th percentile for each class.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        class_var (np.array): A 1D array of length n, representing the class label for each data point.
        
        Returns:
        np.array: A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        """
        unique_classes = np.unique(class_var)
        furthest_points = np.zeros((len(unique_classes), data.shape[1]))
        for i, c in enumerate(unique_classes):
            class_data = data[class_var == c]
            distances = np.linalg.norm(class_data - self.centroids[i, :], axis=1)
            furthest_point_index = int(np.percentile(np.argsort(distances), 90))
            furthest_points[i, :] = class_data[furthest_point_index, :]
        return furthest_points
    
    def get_class_var(self, data, centroids):
        """
        Function to get the class label for each data point based on the nearest centroid.
        
        Parameters:
        data (np.array): A 2D array of shape (n, m) where n is the number of data points and m is the number of features.
        centroids (np.array): A 2D array of shape (k, m) where k is the number of unique classes and m is the number of features.
        
        Returns:
        np.array: A 1D array of length n, representing the class label for each data point.
        """
        distances = np.zeros((data.shape[0], centroids.shape[0]))
        for i, c in enumerate(centroids):
            distances[:, i] = np.linalg.norm(data - c, axis=1)
        self.nearest_indices = np.argmin(distances, axis=1)
        return self.nearest_indices

    def assign_to_nearest_class(self, data):
        distances = np.zeros((data.shape[0], self.centroids.shape[0]))
        for i, c in enumerate(self.centroids):
            distances[:, i] = np.linalg.norm(data - c, axis=1)
        nearest_class = np.argmin(distances, axis=1)
        for i, nc in enumerate(nearest_class):
            if distances[i, nc] < self.furthest_points[nc]:
                self.nearest_indices = np.concatenate((self.nearest_indices, [nc]))
        return nearest_class


In [None]:
import numpy as np
import pandas as pd

# Load the iris dataset
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header=None)
data = df.values[:, :-1]
class_var = df.values[:, -1]

# Initialize the class centroid identifier
cc = ClassCentroidIdentifier()

# Set the centroids for each class
centroids = cc.set_centroids(data, class_var)

# Get the furthest data point in the 90th percentile for each class
furthest_points = cc.get_furthest_points(data, class_var)
cc.furthest_points = furthest_points

# Assign each data point to its nearest class
nearest_class = cc.assign_to_nearest_class(data)

print("Nearest class:", nearest_class)
print("Nearest indices:", cc.nearest_indices)