In [23]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

In [5]:
# Read the dataset from CSV file
df = pd.read_csv('iris.data')

In [6]:
# Extract the features and target
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [7]:
# Standardize the features (optional, but can improve performance)
means = np.mean(X, axis=0)
stds = np.std(X, axis=0)
X = (X - means) / stds

In [13]:
# Split the dataset into training and testing sets
test_size = 0.2  # You can adjust the test size as needed
random_state = 42  # You can set a random state for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

print("Train set size:", len(X_train))
print("Test set size:", len(X_test))

Train set size: 119
Test set size: 30


In [14]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [15]:
def knn_predict(X_train, y_train, X_test, k=3):
    y_pred = []

    for tp in X_test:
        distances = []

        for i, tr in enumerate(X_train):
            distances.append((i, euclidean_distance(tp, tr)))

        distances.sort(key=lambda x: x[1])
        neighbors = distances[:k]

        labels = [y_train[neighbor[0]] for neighbor in neighbors]
        pred_label = max(set(labels), key=labels.count)
        y_pred.append(pred_label)

    return np.array(y_pred)

In [16]:
# Assuming you have a test set, you can split your data into training and testing sets
# For simplicity, let's assume you have a separate test set already
X_train = X
y_train = y

In [27]:
enumerate(X_train)

<enumerate at 0x17ff4f7e0>

In [17]:
# Here, you would load your test set (X_test) and apply the same preprocessing steps as for the training set

# Perform KNN prediction
k = 3  # You can choose any value for k
y_pred = knn_predict(X_train, y_train, X_test, k)

print("Predicted Labels:", y_pred)

Predicted Labels: ['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-setosa' 'Iris-setosa']


In [22]:
# Assuming you have computed y_pred using your KNN function

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Precision
precision = precision_score(y_test, y_pred, average='weighted')
print("Precision:", precision)

# Recall
recall = recall_score(y_test, y_pred, average='weighted')
print("Recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred, average='weighted')
print("F1-score:", f1)

Accuracy: 0.9333333333333333
Precision: 0.9435897435897436
Recall: 0.9333333333333333
F1-score: 0.9319444444444444
