In [23]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score

In [24]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=2)

###KNN classifier without 10 fold cross validation ( without using library )

In [26]:
def predict(k, x):
    distances = []
    for x_train in X_train.values:
        squared_differences = [(a - b) ** 2 for a, b in zip(x, x_train)]
        sum_of_squares = sum(squared_differences)
        distance = sum_of_squares ** 0.5
        distances.append(distance)

    # Find k nearest neighbors
    k_indices = np.argsort(distances)[:k]
    k_outputs = [y_train.values[i] for i in k_indices]

    # Majority vote
    label_counts = Counter(k_outputs)
    return label_counts.most_common(1)[0][0]

def KNN_Algorithm(k):
    return [predict(k, x) for x in X_test.values]

In [27]:
def calculate_accuracy(y_true, y_pred):
    correct = sum(1 for true, pred in zip(y_true, y_pred) if true == pred)
    return (correct / len(y_true)) * 100

# Function to calculate the confusion matrix
def calculate_confusion_matrix(y_true, y_pred, labels):
    matrix = {label: {l: 0 for l in labels} for label in labels}
    for true, pred in zip(y_true, y_pred):
        matrix[true][pred] += 1
    return matrix

# Function to calculate precision, recall, and F1-score
def calculate_metrics(confusion_matrix, labels):
    metrics = {}
    for label in labels:
        tp = confusion_matrix[label][label]  # True Positives
        fp = sum(confusion_matrix[l][label] for l in labels if l != label)  # False Positives
        fn = sum(confusion_matrix[label][l] for l in labels if l != label)  # False Negatives

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        metrics[label] = {
            "precision": precision,
            "recall": recall,
            "f1_score": f1_score
        }
    return metrics

def print_performance(y_pred):
    labels = [0, 1, 2]
    # Calculate accuracy
    acc = calculate_accuracy(y_test, y_pred)
    print(f"Accuracy: {acc:.2f}%")

    # Calculate confusion matrix
    conf_matrix = calculate_confusion_matrix(y_test, y_pred, labels)
    print("Confusion Matrix:")
    for true_label, row in conf_matrix.items():
        print(f"{true_label}: {row}")

    # Calculate precision, recall, and F1-score
    metrics = calculate_metrics(conf_matrix, labels)
    print("Metrics:")
    for label, metric in metrics.items():
        print(f"Class {label}: Precision={metric['precision']:.2f}, Recall={metric['recall']:.2f}, F1-Score={metric['f1_score']:.2f}")

In [28]:
k = 3
y_pred = KNN_Algorithm(k)
print_performance(y_pred)

Accuracy: 100.00%
Confusion Matrix:
0: {0: 14, 1: 0, 2: 0}
1: {0: 0, 1: 8, 2: 0}
2: {0: 0, 1: 0, 2: 8}
Metrics:
Class 0: Precision=1.00, Recall=1.00, F1-Score=1.00
Class 1: Precision=1.00, Recall=1.00, F1-Score=1.00
Class 2: Precision=1.00, Recall=1.00, F1-Score=1.00


In [29]:
k = 5
y_pred = KNN_Algorithm(k)
print_performance(y_pred)

Accuracy: 100.00%
Confusion Matrix:
0: {0: 14, 1: 0, 2: 0}
1: {0: 0, 1: 8, 2: 0}
2: {0: 0, 1: 0, 2: 8}
Metrics:
Class 0: Precision=1.00, Recall=1.00, F1-Score=1.00
Class 1: Precision=1.00, Recall=1.00, F1-Score=1.00
Class 2: Precision=1.00, Recall=1.00, F1-Score=1.00


###KNN classifier with 10 fold cross validation ( without using library )

In [33]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

# Define the KNN classifier
def knn_classifier(X_train, y_train, X_test, k):
    predictions = []
    for test_point in X_test:
        # Calculate Euclidean distance
        distances = np.linalg.norm(X_train - test_point, axis=1)
        # Get k nearest neighbors
        k_indices = np.argsort(distances)[:k]
        k_labels = y_train.iloc[k_indices]
        # Majority vote
        most_common = Counter(k_labels).most_common(1)[0][0]
        predictions.append(most_common)
    return np.array(predictions)

# Manual implementation of 10-fold cross-validation
def cross_validate_knn(X, y, k, n_splits=10):
    fold_size = len(X) // n_splits
    indices = np.arange(len(X))
    np.random.shuffle(indices)

    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []

    for fold in range(n_splits):
        # Split into training and testing sets
        test_indices = indices[fold * fold_size: (fold + 1) * fold_size]
        train_indices = np.setdiff1d(indices, test_indices)

        # Use .iloc for proper indexing of Pandas DataFrame
        X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
        y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]

        # Train and predict using KNN
        y_pred = knn_classifier(X_train.values, y_train, X_test.values, k)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')

        accuracy_list.append(accuracy)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    # Return mean of metrics
    return {
        'accuracy': np.mean(accuracy_list),
        'precision_macro': np.mean(precision_list),
        'recall_macro': np.mean(recall_list),
        'f1_macro': np.mean(f1_list)
    }

In [34]:
def print_evaluation(results):
  print("Evaluation Metrics (10-Fold Cross-Validation):")
  print(f"Accuracy: {results['accuracy']:.4f}")
  print(f"Precision (Macro): {results['precision_macro']:.4f}")
  print(f"Recall (Macro): {results['recall_macro']:.4f}")
  print(f"F1-Score (Macro): {results['f1_macro']:.4f}")

In [35]:
# Perform 10-fold cross-validation
k = 3
results = cross_validate_knn(X, y, k, n_splits=10)
print_evaluation(results)

Evaluation Metrics (10-Fold Cross-Validation):
Accuracy: 0.9667
Precision (Macro): 0.9694
Recall (Macro): 0.9661
F1-Score (Macro): 0.9636


In [36]:
# Perform 10-fold cross-validation
k = 5
results = cross_validate_knn(X, y, k, n_splits=10)
print_evaluation(results)

Evaluation Metrics (10-Fold Cross-Validation):
Accuracy: 0.9667
Precision (Macro): 0.9745
Recall (Macro): 0.9697
F1-Score (Macro): 0.9703
