In [19]:
import numpy as np
import pandas as pd

***Load Iris Data into a Pandas DataFrame***

In [13]:
df = pd.read_csv( "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",header=None, )

df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


- **Randomly Select 20% of the Records for Testing**
- **Extract the First Four Columns as Features (X_test)**
- **Extract the Last Column as Labels (y_test)**

In [14]:
df_test = df.sample(frac=0.2, random_state=42)

X_test = df_test.iloc[:, :4].values
y_test =df_test.iloc[:, -1].values

In [15]:
X_test.shape, y_test.shape

((30, 4), (30,))

- **Store the remaining 80% of the records in the same way.** 
- **We use the names `X_train` and `y_train` for the arrays.**


*This is the data that your model will use as ground-truth knowledge (i.e. the
training data).*

In [16]:
df_train = df.drop(df_test.index)

X_train = df_train.iloc[:, :4].values
y_train =df_train.iloc[:, -1].values

In [17]:
X_train.shape, y_train.shape

((120, 4), (120,))

Defining `KNearestNeighbors` class 

In [None]:
class KNearestNeighbors:
    def __init__(self, k, distance_metric="euclidean", weights = "uniform"):
        
        self.k = k
        self.weights = weights
        self.distance_metric = distance_metric.lower()

        
        self.X_train = None
        self.y_train = None 

    def _euclidean_distance(self, p, q):
        return np.sqrt(np.sum((np.array(p) - np.array(q))**2))
    
    def _manhattan_distance(self, p, q):
        return np.abs(np.sum(np.array(p) - np.array(q)))
    
    def _cosine_distance(self, p, q):
        similarity = np.dot(p, q) / (np.linalg.norm(p) * np.linalg.norm(q))
        return 1 - similarity
    
    def _compute_distance(self, p, q):
        if self.distance_metric == "euclidean":
            return self._euclidean_distance(p, q) 
        
        elif self.distance_metric == "cosine":
            return self._cosine_distance(p, q)
        
        elif self.distance_metric == "manhattan":
            return self._manhattan_distance(p, q)
        
        else:
            raise ValueError("Unsupported distance metric")

        
    def _get_neighbors(self, x):
        distances = [self._compute_distance(x, x_train) for x_train in self.X_train]
        neighbors_indices = np.argsort(distances)[:self.k]
        neighbors_distances = [distances[i] for i in neighbors_indices]
        return neighbors_indices, neighbors_distances
        
    def fit(self, X, y):
        """
        Store the 'prior knowledge' of you model that will be used
        to predict new labels.
        :param X : input data points, ndarray, shape = (R,C).
        :param y : input labels, ndarray, shape = (R,).
        """
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        predictions = []
        for x in X:
            neighbors_indices, neighbors_distances = self._get_neighbors(x)
            neighbor_labels = self.y_train[neighbors_indices]

            if self.weights == "uniform":
                # Majority vote
                unique_labels, counts = np.unique(neighbor_labels, return_counts=True)
                prediction = unique_labels[np.argmax(counts)]
                
            elif self.weights == "distance":
                # Inverse distance weighting
                weights = 1 / np.array(neighbors_distances)
                weighted_votes = {}
                
                for label, weight in zip(neighbor_labels, weights):
                    weighted_votes[label] = weighted_votes.get(label, 0) + weight
                prediction = max(weighted_votes, key=weighted_votes.get)
            else:
                raise ValueError("Unsupported weighting scheme")

            predictions.append(prediction)

        return np.array(predictions)

    def score(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)            


In [44]:
knn = KNearestNeighbors(k=3, distance_metric="manhattan", weights="uniform")

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
accuracy = knn.score(X_test, y_test)

print("Accuracy:", accuracy)



Accuracy: 0.9
