# Section I

In [1]:
import statistics
import math

def knn(data, query, k, distance_fn, choice_fn):
    neighbor_distances_and_indices = []

    # Calculate the distance between the query example and all the examples in the data.
    for index, example in enumerate(data):
        distance = distance_fn(example, query)
        neighbor_distances_and_indices.append((distance, index))

    # Sort the distances and return the labels of the k nearest neighbors.
    k_nearest_distances_and_indices = sorted(neighbor_distances_and_indices)[:k]
    k_nearest_labels = [data[i][1] for distance, i in k_nearest_distances_and_indices]

    # Pick the first k entries from the sorted collection
    k_nearest_distances_and_indices = k_nearest_distances_and_indices[:k]

    # Get the labels of the selected k entries
    k_nearest_labels = [data[i][1] for distance, i in k_nearest_distances_and_indices]

    # If regression (mean), if classification (mode)
    if choice_fn == mean:
        return statistics.mean(k_nearest_labels)
    elif choice_fn == mode:
        return statistics.mode(k_nearest_labels)

    return choice_fn(k_nearest_labels)

def euclidean_distance(point1, point2):
   # implement this function to return euclidean distance between point1 and point2
   return math.sqrt((point1[0] - point2[0])**2)

def mean(labels):
   # implement this function to return the mean of the labels.
   return statistics.mean(labels)

def mode(labels):
  # implement this function to return the mode of the labels.
   return statistics.mode(labels)


# Section II

In [2]:
from sklearn.neighbors import KNeighborsRegressor,KNeighborsClassifier

def sklearn_knn_regression(reg_data, reg_query):


    # Initialize the KNN regressor with 3 nearest neighbors
    X = reg_data[:, :-1]  # All columns except the last one
    y = reg_data[:, -1]   # The last column

    # Fit the model on the training data; use all but the last column as features and the last column as the target
    knn_regressor = KNeighborsRegressor(n_neighbors=3)

    # Predict the output for the provided query
    knn_regressor.fit(X, y)

    reg_query = np.array(reg_query).reshape(1, -1)

    # Predict the output for the provided query and return the first (and likely only) prediction
    skl_reg_prediction = knn_regressor.predict(reg_query)

    return skl_reg_prediction


def sklearn_knn_classification(clf_data, clf_query):

    # Initialize the KNN classifier with 3 nearest neighbors
    X = clf_data[:, :-1]  # All columns except the last one
    y = clf_data[:, -1]   # The last column

    # Fit the model on the training data; use all but the last column as features and the last column as the target
    skl_clf = KNeighborsClassifier(n_neighbors=3)

    clf_query = np.array(reg_query).reshape(1, -1)

    # Predict the class for the provided query and return the first (and likely only) prediction
    skl_clf.fit(X, y)

    skl_clf_prediction = skl_clf.predict(clf_query)

    return skl_clf_prediction

# Section III


In [3]:
from collections import defaultdict

def weighted_mode(labels, weights):
    # Initialize a defaultdict to store the sum of weights for each label
    max_weighted_label = max(weights, key=weights.get)

    # Return the label that has the highest sum of weights
    return max_weighted_label

def knn_weighted(data, query, k, distance_fn, choice_fn, weights):

    # Calculate the distance between the query example and all the examples in the data.
    neighbor_distances_and_indices = []
    for index, example in enumerate(data):
        distance = distance_fn(example, query)

    # Sort the distances and return the labels of the k nearest neighbors.
    k_nearest_distances_and_indices = sorted(neighbor_distances_and_indices)[:k]
    k_nearest_labels = [data[i][1] for distance, i in k_nearest_distances_and_indices]

    # Pick the first k entries from the sorted collection
    k_nearest_distances_and_indices = k_nearest_distances_and_indices[:k]

    # Get the labels of the selected k entries
    k_nearest_labels = [data[i][1] for distance, i in k_nearest_distances_and_indices]

    # Apply the weighted mode function and return nearest neighbors too
    return choice_fn(k_nearest_labels, weights)

In [None]:
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive/')
%cd /content/drive/MyDrive/ECE657

# # Load the Regression Data. The first index consists of age(feature) and the second index is the label. The label is height of the person in centimeter.
reg_data = pd.read_csv('regression_data.csv').values

reg_query = np.array([[55]])  # reshape to fit scikit-learn requirements

# Custom KNN Prediction
custom_reg_prediction = knn(reg_data, reg_query, k=3, distance_fn=euclidean_distance, choice_fn=mean)

# Scikit-learn KNN Regression
skl_reg_prediction = sklearn_knn_regression(reg_data, reg_query)

print("Custom KNN Regression Prediction:", custom_reg_prediction)
print("Scikit-learn KNN Regression Prediction:", skl_reg_prediction)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/ECE657
Custom KNN Regression Prediction: 128.24666666666667
Scikit-learn KNN Regression Prediction: [128.24666667]


  return math.sqrt((point1[0] - point2[0])**2)


In [None]:
# Load the Classification Data. The first index consists of age(feature) and the second index is the label. The label 0 is for likes pineapple or not.
clf_data = pd.read_csv('classification_data.csv').values

clf_query = np.array([[18]])  # reshape to fit scikit-learn requirements

# Custom KNN Prediction
custom_clf_prediction = knn(clf_data, clf_query, k=3, distance_fn=euclidean_distance, choice_fn=mode)

# Scikit-learn KNN Classification
skl_clf_prediction = sklearn_knn_classification(clf_data, clf_query)

print("Custom KNN Classification Prediction:", custom_clf_prediction)
print("Scikit-learn KNN Classification Prediction:", skl_clf_prediction)

Custom KNN Classification Prediction: 0
Scikit-learn KNN Classification Prediction: [0]


  return math.sqrt((point1[0] - point2[0])**2)


In [None]:
weights = {0: 1, 1: 2}

# Query for whether a 15-year-old likes pineapple or not. The classification should be 1 as this exact sample is present in the dataset
# but because of unbalanced dataset this will be predicted as class 0.

clf_query = np.array([[15]])  # reshape to fit scikit-learn requirements

# Custom KNN Prediction
custom_clf_prediction = knn(clf_data, clf_query, k=3, distance_fn=euclidean_distance, choice_fn=mode)

# Scikit-learn KNN Classification
skl_clf_prediction = sklearn_knn_classification(clf_data, clf_query)

print("Custom KNN Classification Prediction:", custom_clf_prediction)
print("Scikit-learn KNN Classification Prediction:", skl_clf_prediction)

Custom KNN Classification Prediction: 0
Scikit-learn KNN Classification Prediction: [0]


  return math.sqrt((point1[0] - point2[0])**2)


In [None]:
# print(weights)
clf_prediction = knn_weighted(clf_data, clf_query, k=3, distance_fn=euclidean_distance, choice_fn=weighted_mode, weights=weights)
print("Prediction for weighted KNN:", clf_prediction)

Prediction for weighted KNN: 1


  return math.sqrt((point1[0] - point2[0])**2)
