In [17]:
import pandas as pd
import numpy as np
from collections import Counter

In [18]:
# Load dataset from CSV file
df = pd.read_csv('houses_data.csv')
df

Unnamed: 0,Rumah,Lat,Long,Lokasi
0,A,11,26,Kota
1,B,15,29,Kota
2,C,19,28,Kota
3,D,18,30,Kota
4,E,16,26,Kota
5,F,23,25,Kabupaten
6,G,25,22,Kabupaten
7,H,21,24,Kabupaten
8,I,23,23,Kabupaten
9,J,29,24,Kabupaten


In [19]:
# Function to calculate Euclidean distance
def euclidean_distance(house1, house2):
    return np.sqrt(np.sum((np.array(house1) - np.array(house2)) ** 2))

# Function to perform k-NN classification
def knn_classification(house_X, houses, locations, k=5):
    # Calculate distances between house X and all other houses
    distances = {house: euclidean_distance(house_X, [info['Lat'], info['Long']]) for house, info in houses.items()}
    
    # Sort houses by distance (ascending order)
    sorted_distances = sorted(distances.items(), key=lambda item: item[1])
    
    # Select the k nearest neighbors
    nearest_neighbors = sorted_distances[:k]
    
    # Get the locations of the nearest neighbors
    neighbor_locations = [locations[house] for house, _ in nearest_neighbors]
    
    # Perform majority voting
    location_counts = Counter(neighbor_locations)
    predicted_location = location_counts.most_common(1)[0][0]
    
    return sorted_distances, nearest_neighbors, predicted_location

In [20]:
# Extract latitude and longitude coordinates as a dictionary
houses = df.set_index('Rumah')[['Lat', 'Long']].to_dict('index')

# Extract the location (Kota/Kabupaten) for each house
locations = df.set_index('Rumah')['Lokasi'].to_dict()

# Coordinates of house X (change if needed)
house_X = [19, 25]

# Set the number of neighbors (k)
k = 5

# Perform k-NN classification
sorted_distances, nearest_neighbors, predicted_location = knn_classification(house_X, houses, locations, k)

# Output the results in a clean format
print("k-NN Classification Result:")
print(f"House X (Lat: {house_X[0]}, Long: {house_X[1]}) is predicted to be in: {predicted_location}")
print("\nNearest Neighbors (sorted by distance):")
for house, distance in nearest_neighbors:
    print(f"House {house}: Distance = {distance:.2f}, Location = {locations[house]}")


k-NN Classification Result:
House X (Lat: 19, Long: 25) is predicted to be in: Kabupaten

Nearest Neighbors (sorted by distance):
House X: Distance = 0.00, Location = ?
House H: Distance = 2.24, Location = Kabupaten
House C: Distance = 3.00, Location = Kota
House E: Distance = 3.16, Location = Kota
House F: Distance = 4.00, Location = Kabupaten
