In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# Define distance functions
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def manhattan_distance(a, b):
    return np.sum(np.abs(a - b))

def chebyshev_distance(a, b):
    return np.max(np.abs(a - b))

# KNN algorithm implementation
def knn_classify(test_point, train_data, train_labels, k, distance_func, weighted=False):
    distances = []
    for i in range(len(train_data)):
        distance = distance_func(test_point, train_data[i])
        distances.append((distance, train_labels[i]))
    distances.sort(key=lambda x: x[0])
    k_nearest_neighbors = distances[:k]
    k_nearest_labels = [label for _, label in k_nearest_neighbors]
    
    # Weighted voting using 1 / distance^2
    if weighted:
        label_weights = {}
        for dist, label in k_nearest_neighbors:
            weight = 1 / (dist ** 2 + 1e-5)  # Adding a small value to avoid division by zero
            if label in label_weights:
                label_weights[label] += weight
            else:
                label_weights[label] = weight
        most_common_label = max(label_weights, key=label_weights.get)
    else:
        # Unweighted voting
        label_counts = {}
        for label in k_nearest_labels:
            if label in label_counts:
                label_counts[label] += 1
            else:
                label_counts[label] = 1
        most_common_label = max(label_counts, key=label_counts.get)
    
    return most_common_label

# Function to get user input for training data
def get_training_data():
    n = int(input("Enter the number of training samples: "))
    train_data = []
    train_labels = []
    for i in range(n):
        point = [float(x) for x in input(f"Enter coordinates of sample {i + 1} (comma separated): ").split(',')]
        label = input(f"Enter label of sample {i + 1}: ")
        train_data.append(point)
        train_labels.append(label)
    return np.array(train_data), train_labels

# Function to get k and test point
def get_test_data_and_k():
    k = int(input("Enter the value of k: "))
    test_point = np.array([float(x) for x in input("Enter test point coordinates (comma separated): ").split(',')])
    return k, test_point

# Function to choose distance formula
def choose_distance_formula():
    print("Choose Distance Formula")
    print("1. Euclidean Distance")
    print("2. Manhattan Distance")
    print("3. Chebyshev Distance")
    
    choice = int(input("Choose distance formula (1/2/3): "))
    
    if choice == 1:
        return euclidean_distance
    elif choice == 2:
        return manhattan_distance
    elif choice == 3:
        return chebyshev_distance
    else:
        print("Invalid choice!")
        return None

# Function to calculate accuracy
def calculate_accuracy(true_labels, predicted_labels):
    correct = sum(t == p for t, p in zip(true_labels, predicted_labels))
    return correct / len(true_labels)

# Function to calculate precision
def calculate_precision(true_labels, predicted_labels):
    true_positives = sum(t == p == 1 for t, p in zip(true_labels, predicted_labels))
    predicted_positives = sum(1 for p in predicted_labels if p == 1)
    return true_positives / predicted_positives if predicted_positives > 0 else 0

# Function to calculate recall
def calculate_recall(true_labels, predicted_labels):
    true_positives = sum(t == p == 1 for t, p in zip(true_labels, predicted_labels))
    actual_positives = sum(true_labels)
    return true_positives / actual_positives if actual_positives > 0 else 0

# Function to calculate F1 score
def calculate_f1_score(true_labels, predicted_labels):
    precision = calculate_precision(true_labels, predicted_labels)
    recall = calculate_recall(true_labels, predicted_labels)
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

# Main function
def main():
    print("KNN Classification")
    
    # Get user choice for dataset
    print("Choose Dataset")
    print("1. User-provided Dataset")
    print("2. Wisconsin Breast Cancer Dataset")
    
    dataset_choice = int(input("Enter your choice (1/2): "))
    
    if dataset_choice == 1:
        train_data, train_labels = get_training_data()
        k, test_point = get_test_data_and_k()
        test_data = [test_point]
        test_labels = ["Test"]
    elif dataset_choice == 2:
        data = load_breast_cancer()
        train_data, test_data, train_labels, test_labels = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
        k = int(input("Enter the value of k: "))
    else:
        print("Invalid choice!")
        return
    
    # Ensure labels are in integer format
    train_labels = list(map(int, train_labels))
    test_labels = list(map(int, test_labels))
    
    # Choose distance formula
    distance_func = choose_distance_formula()
    
    if distance_func is None:
        return
    
    # Choose voting method
    weighted = input("Use weighted voting? (yes/no): ").strip().lower() == 'yes'
    
    # Perform classification
    predictions = []
    for test_point in test_data:
        predicted_label = knn_classify(test_point, train_data, train_labels, k, distance_func, weighted)
        predictions.append(int(predicted_label))  # Ensure predictions are integers
    
    # Calculate performance metrics
    accuracy = calculate_accuracy(test_labels, predictions)
    f1 = calculate_f1_score(test_labels, predictions)
    precision = calculate_precision(test_labels, predictions)
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")

if __name__ == "__main__":
    main()


In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# Define distance functions
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def manhattan_distance(a, b):
    return np.sum(np.abs(a - b))

def chebyshev_distance(a, b):
    return np.max(np.abs(a - b))

# KNN algorithm implementation
def knn_classify(test_point, train_data, train_labels, k, distance_func, weighted=False):
    distances = []
    for i in range(len(train_data)):
        distance = distance_func(test_point, train_data[i])
        distances.append((distance, train_labels[i]))
    distances.sort(key=lambda x: x[0])
    k_nearest_neighbors = distances[:k]
    k_nearest_labels = [label for _, label in k_nearest_neighbors]
    
    # Weighted voting using 1 / distance^2
    if weighted:
        label_weights = {}
        for dist, label in k_nearest_neighbors:
            weight = 1 / (dist ** 2 + 1e-5)  # Adding a small value to avoid division by zero
            if label in label_weights:
                label_weights[label] += weight
            else:
                label_weights[label] = weight
        most_common_label = max(label_weights, key=label_weights.get)
    else:
        # Unweighted voting
        label_counts = {}
        for label in k_nearest_labels:
            if label in label_counts:
                label_counts[label] += 1
            else:
                label_counts[label] = 1
        most_common_label = max(label_counts, key=label_counts.get)
    
    return most_common_label

# Default training data
default_train_data = [
    [32, 0], [40, 0], [16, 1], [34, 1], [55, 0],
    [40, 0], [20, 1], [15, 0], [55, 1], [15, 0]
]
default_train_labels = [
    'football', 'neither', 'cricket', 'cricket', 'neither',
    'cricket', 'neither', 'cricket', 'football', 'football'
]

# User-defined functions for metrics
def calculate_accuracy(true_labels, predicted_labels):
    correct = sum(t == p for t, p in zip(true_labels, predicted_labels))
    return correct / len(true_labels)

def calculate_f1_score(true_labels, predicted_labels):
    precision = calculate_precision(true_labels, predicted_labels)
    recall = calculate_recall(true_labels, predicted_labels)
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

def calculate_precision(true_labels, predicted_labels):
    true_positives = sum(t == p == 1 for t, p in zip(true_labels, predicted_labels))
    predicted_positives = sum(predicted_labels)
    return true_positives / predicted_positives if predicted_positives > 0 else 0

def calculate_recall(true_labels, predicted_labels):
    true_positives = sum(t == p == 1 for t, p in zip(true_labels, predicted_labels))
    actual_positives = sum(true_labels)
    return true_positives / actual_positives if actual_positives > 0 else 0

# Main function
def main():
    while True:
        print("KNN Classification")
        print("Choose Dataset")
        print("1. User-provided Dataset")
        print("2. Wisconsin Breast Cancer Dataset")
        print("3. Exit")
        
        dataset_choice = int(input("Enter your choice (1/2/3): "))
        
        if dataset_choice == 1:
            train_data = np.array(default_train_data)
            train_labels = default_train_labels
            k = int(input("Enter the value of k: "))
            test_point = np.array([float(x) for x in input("Enter test point coordinates (comma separated): ").split(',')])
            test_data = [test_point]
            test_labels = ["Test"]
        elif dataset_choice == 2:
            data = load_breast_cancer()
            train_data, test_data, train_labels, test_labels = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
            k = int(input("Enter the value of k: "))
        elif dataset_choice == 3:
            break
        else:
            print("Invalid choice!")
            continue
        
        # Choose distance formula
        print("Choose Distance Formula")
        print("1. Euclidean Distance")
        print("2. Manhattan Distance")
        print("3. Chebyshev Distance")
        distance_choice = int(input("Choose distance formula (1/2/3): "))
        
        if distance_choice == 1:
            distance_func = euclidean_distance
        elif distance_choice == 2:
            distance_func = manhattan_distance
        elif distance_choice == 3:
            distance_func = chebyshev_distance
        else:
            print("Invalid choice!")
            continue
        
        # Choose voting method
        weighted = input("Use weighted voting? (yes/no): ").strip().lower() == 'yes'
        
        # Perform classification
        predictions = []
        for test_point in test_data:
            predicted_label = knn_classify(test_point, train_data, train_labels, k, distance_func, weighted)
            predictions.append(predicted_label)
        
        if dataset_choice == 1:
            print(f"Predicted label for the test point {test_point} is: {predictions[0]}")
        else:
            # Calculate performance metrics for the Wisconsin Breast Cancer Dataset
            accuracy = calculate_accuracy(test_labels, predictions)
            f1 = calculate_f1_score(test_labels, predictions)
            precision = calculate_precision(test_labels, predictions)
            
            print(f"Accuracy: {accuracy:.4f}")
            print(f"F1 Score: {f1:.4f}")
            print(f"Precision: {precision:.4f}")

if __name__ == "__main__":
    main()


KNN Classification
Choose Dataset
1. User-provided Dataset
2. Wisconsin Breast Cancer Dataset
3. Exit


Enter your choice (1/2/3):  2
Enter the value of k:  3


Choose Distance Formula
1. Euclidean Distance
2. Manhattan Distance
3. Chebyshev Distance


Choose distance formula (1/2/3):  1
Use weighted voting? (yes/no):  no


Accuracy: 0.9298
F1 Score: 0.9444
Precision: 0.9315
KNN Classification
Choose Dataset
1. User-provided Dataset
2. Wisconsin Breast Cancer Dataset
3. Exit


Enter your choice (1/2/3):  2
Enter the value of k:  3


Choose Distance Formula
1. Euclidean Distance
2. Manhattan Distance
3. Chebyshev Distance


Choose distance formula (1/2/3):  2
Use weighted voting? (yes/no):  no


Accuracy: 0.9298
F1 Score: 0.9444
Precision: 0.9315
KNN Classification
Choose Dataset
1. User-provided Dataset
2. Wisconsin Breast Cancer Dataset
3. Exit


Enter your choice (1/2/3):  2
