# 7

## Task 1

In [None]:
import numpy as np

# Dataset of points
points = np.array([
    [51, 92, 14, 71, 60],
    [20, 82, 86, 74, 70],
    [10, 84, 70, 33, 61],
    [75, 98, 56, 83, 41],
    [56, 92, 48, 37, 80],
    [28, 46, 93, 54, 22],
    [62, 99, 74, 50, 20],
    [21, 84, 77, 96, 19],
    [63, 29, 71, 48, 88],
    [17, 11, 94, 22, 48],
    [93, 66, 58, 54, 10],
    [71, 96, 87, 35, 99],
    [50, 82, 12, 73, 31],
    [83, 64, 50, 72, 19],
    [96, 53, 19, 60, 90],
    [25, 68, 42, 55, 94],
    [47, 81, 99, 72, 63],
    [52, 35, 40, 91, 12],
    [64, 58, 36, 22, 78],
    [89, 46, 68, 94, 21]
])

# Query point
query_point = np.array([63, 45, 76, 32, 14])

# Define distance functions
def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

def manhattan_distance(p1, p2):
    return np.sum(np.abs(p1 - p2))

def chebyshev_distance(p1, p2):
    return np.max(np.abs(p1 - p2))

# Find the nearest point using each distance metric
def find_nearest_point(points, query_point, distance_func):
    distances = np.array([distance_func(point, query_point) for point in points])
    nearest_index = np.argmin(distances)
    return nearest_index, distances[nearest_index]

# Calculate the nearest points using each distance metric
euclidean_idx, euclidean_dist = find_nearest_point(points, query_point, euclidean_distance)
manhattan_idx, manhattan_dist = find_nearest_point(points, query_point, manhattan_distance)
chebyshev_idx, chebyshev_dist = find_nearest_point(points, query_point, chebyshev_distance)

# Display results
print(f"Nearest point using Euclidean distance: {points[euclidean_idx]}, Distance: {euclidean_dist:.4f}")
print(f"Nearest point using Manhattan distance: {points[manhattan_idx]}, Distance: {manhattan_dist:.4f}")
print(f"Nearest point using Chebyshev distance: {points[chebyshev_idx]}, Distance: {chebyshev_dist:.4f}")


Nearest point using Euclidean distance: [28 46 93 54 22], Distance: 45.4203
Nearest point using Manhattan distance: [62 99 74 50 20], Distance: 81.0000
Nearest point using Chebyshev distance: [93 66 58 54 10], Distance: 30.0000


## Task 2

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = '/content/Breast_cancer_data.csv'  # Update with the actual file path if needed
df = pd.read_csv(file_path)

# Separate features and diagnosis label
X = df.drop(columns=['diagnosis']).values  # Features
y = df['diagnosis'].values  # Labels

# Normalize the data (important for distance-based methods)
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Choose a query sample (e.g., the first sample in the dataset)
query_sample = X_normalized[0]

# Define distance functions
def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

def manhattan_distance(p1, p2):
    return np.sum(np.abs(p1 - p2))

def chebyshev_distance(p1, p2):
    return np.max(np.abs(p1 - p2))

# Calculate distances using the distance functions
def calculate_distances(X, query_sample, distance_func):
    distances = np.array([distance_func(sample, query_sample) for sample in X])
    return distances

# Find the nearest neighbors using each distance metric
def find_nearest_neighbors(X, query_sample, distance_func, k=5):
    distances = calculate_distances(X, query_sample, distance_func)
    nearest_indices = np.argsort(distances)[:k]
    return nearest_indices, distances[nearest_indices]

# Number of neighbors to find
k = 5




In [None]:
# Euclidean nearest neighbors
euclidean_neighbors, euclidean_distances = find_nearest_neighbors(X_normalized, query_sample, euclidean_distance, k)
print(f"Nearest neighbors using Euclidean distance:\n{df.iloc[euclidean_neighbors]}\nDistances: {euclidean_distances}\n")

Nearest neighbors using Euclidean distance:
    mean_radius  mean_texture  mean_perimeter  mean_area  mean_smoothness  \
0         17.99         10.38           122.8     1001.0          0.11840   
25        17.14         16.40           116.0      912.7          0.11860   
77        18.05         16.15           120.2     1006.0          0.10650   
29        17.57         15.05           115.0      955.1          0.09847   
22        15.34         14.26           102.5      704.4          0.10730   

    diagnosis  
0           0  
25          0  
77          0  
29          0  
22          0  
Distances: [0.         1.67071572 1.78574732 1.9980517  2.00743146]



In [None]:
# Manhattan nearest neighbors
manhattan_neighbors, manhattan_distances = find_nearest_neighbors(X_normalized, query_sample, manhattan_distance, k)
print(f"Nearest neighbors using Manhattan distance:\n{df.iloc[manhattan_neighbors]}\nDistances: {manhattan_distances}\n")

Nearest neighbors using Manhattan distance:
    mean_radius  mean_texture  mean_perimeter  mean_area  mean_smoothness  \
0         17.99         10.38           122.8     1001.0           0.1184   
25        17.14         16.40           116.0      912.7           0.1186   
77        18.05         16.15           120.2     1006.0           0.1065   
53        18.22         18.70           120.3     1033.0           0.1148   
45        18.65         17.60           123.7     1076.0           0.1099   

    diagnosis  
0           0  
25          0  
77          0  
53          0  
45          0  
Distances: [0.         2.43956323 2.58600785 2.76363201 3.03755831]



In [None]:
# Chebyshev nearest neighbors
chebyshev_neighbors, chebyshev_distances = find_nearest_neighbors(X_normalized, query_sample, chebyshev_distance, k)
print(f"Nearest neighbors using Chebyshev distance:\n{df.iloc[chebyshev_neighbors]}\nDistances: {chebyshev_distances}\n")

Nearest neighbors using Chebyshev distance:
    mean_radius  mean_texture  mean_perimeter  mean_area  mean_smoothness  \
0         17.99         10.38          122.80     1001.0           0.1184   
22        15.34         14.26          102.50      704.4           0.1073   
89        14.64         15.24           95.77      651.9           0.1132   
4         20.29         14.34          135.10     1297.0           0.1003   
73        13.80         15.79           90.43      584.1           0.1007   

    diagnosis  
0           0  
22          0  
89          1  
4           0  
73          0  
Distances: [0.         1.03214323 1.2928392  1.37715825 1.43914816]

