In [8]:
from math import sqrt
from csv import reader
import pandas as pd

In [9]:
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        next(csv_reader)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset


In [10]:
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (float(row1[i]) - float(row2[i]))**2
    return sqrt(distance)


def manhattan_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += abs(float(row1[i]) - float(row2[i]))
    return distance


def minkowski_distance(row1, row2, p=3):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += abs(float(row1[i]) - float(row2[i])) ** p
    return distance ** (1 / p)


def chebyshev_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):  # Assuming the last element might be a class label
        distance = max(distance, abs(float(row1[i]) - float(row2[i])))
    return distance


def hamming_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        if row1[i] != row2:
            distance += 1
    return distance


def cosine_distance(row1, row2):
    row1 = [float(i) for i in row1[:-1]]
    row2 = [float(i) for i in row2[:-1]]

    dot_product = sum(a*b for a, b in zip(row1, row2))
    magnitude_row1 = sum(a**2 for a in row1) ** 0.5
    magnitude_row2 = sum(b**2 for b in row2) ** 0.5
    cosine_similarity = dot_product / (magnitude_row1 * magnitude_row2)
    return 1 - cosine_similarity

In [11]:
def get_distance_function(metric, p=3):
    if metric == 'euclidean':
        return euclidean_distance
    elif metric == 'manhattan':
        return manhattan_distance
    elif metric == 'minkowski':
        return lambda row1, row2: minkowski_distance(row1, row2, p)
    elif metric == 'chebyshev':
        return chebyshev_distance
    elif metric == 'hamming':
        return hamming_distance
    elif metric == 'cosine':
        return cosine_distance
    else:
        return euclidean_distance

In [12]:
def get_neighbors(train, test_row, num_neighbors, metric='euclidean', p=3):
    distances = list()
    distance_function = get_distance_function(metric, p)
    for _, train_row in train.iterrows():
        train_row_list = train_row[:-1].tolist()
        dist = distance_function(test_row, train_row_list)
        distances.append((train_row_list, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = [distances[i][0] for i in range(num_neighbors)]
    return neighbors


def predict_classification(train, test_row, num_neighbors, metric='euclidean'):
    neighbors = get_neighbors(train, list(
        test_row[:-1]), num_neighbors, metric=metric)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction


def load_data(filename):
    df = pd.read_csv(filename, header=None)
    df = df.replace(["Male", "Female"], [0, 1])
    return df

In [13]:
data = load_data("./indian_liver_patient.csv")
test_row_index = 0

test_row = 63, 0, 0.9, 0.2, 194, 52, 45, 6, 3.9, 1.85,

In [14]:
print(
    f"Euclidean Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='euclidean')}")
print(
    f"Manhattan Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='manhattan')}")
print(
    f"Minkowski Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='minkowski')}")
print(
    f"Chebyshev Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='chebyshev')}")
print(
    f"Hamming Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='hamming')}")
print(
    f"Cosine Prediction: {predict_classification(data.iloc[1:], test_row, 3, metric='cosine')}")

Euclidean Prediction: 0.8
Manhattan Prediction: 0.8
Minkowski Prediction: 0.8
Chebyshev Prediction: 0.8
Hamming Prediction: 0.74
Cosine Prediction: 0.8
