In [None]:
import numpy as np
import pandas as pd

# Step 1: Transcribe the training dataset
train_data = {
    'Distance (km)': [307, 491, 327, 285, 400, 412, 393, 86, 465, 349, 417, 448, 238, 370, 367, 63, 115, 214, 431, 302, 433, 166, 111, 408],
    'Air Conditioned': ['Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes'],
    'Television': ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes'],
    'Food Service': ['Available', 'Not Available', 'Available', 'Available', 'Not Available', 'Available', 'Not Available', 'Not Available', 'Not Available', 'Not Available', 'Available', 'Available', 'Available', 'Available', 'Not Available', 'Available', 'Available', 'Not Available', 'Available', 'Available', 'Not Available', 'Available', 'Not Available', 'Available'],
    'Bus Fare': ['Affordable', 'Affordable', 'Premium', 'Premium', 'Affordable', 'Premium', 'Premium', 'Budget', 'Luxury', 'Budget', 'Premium', 'Budget', 'Premium', 'Budget', 'Luxury', 'Budget', 'Luxury', 'Luxury', 'Luxury', 'Luxury', 'Luxury', 'Budget', 'Premium', 'Budget']
}

# Step 2: Transcribe the test dataset
test_data = {
    'Distance (km)': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'Air Conditioned': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Television': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Food Service': ['Available', 'Not Available', 'Available', 'Not Available', 'Available', 'Not Available', 'Available', 'Not Available', 'Available', 'Not Available'],
    'Bus Fare': ['Affordable', 'Premium', 'Budget', 'Luxury', 'Affordable', 'Premium', 'Budget', 'Luxury', 'Affordable', 'Premium']
}

# Create pandas DataFrames
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

# Step 3: Convert categorical data to numeric form using Label Encoding
# This maps categories like 'Yes' and 'No' to numbers, e.g., 'Yes' -> 1, 'No' -> 0
def encode_features(df):
    df_encoded = df.copy()
    for column in df.columns:
        if df[column].dtype == 'object':  # if the column contains strings (categorical data)
            df_encoded[column] = pd.Categorical(df[column]).codes  # convert categories to numbers
    return df_encoded

# Encode both the train and test datasets
train_encoded = encode_features(train_df)
test_encoded = encode_features(test_df)

# Step 4: Calculate Euclidean Distance (ignoring label columns for now)
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1) - 1):  # Do not include the label in distance calculation
        distance += (row1[i] - row2[i]) ** 2
    return np.sqrt(distance)

# Step 5: Find Nearest Neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = []
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = []
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors

# Step 6: Make Predictions
def predict_classification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]  # Last column contains the labels
    prediction = max(set(output_values), key=output_values.count)  # Majority vote
    return prediction

# Step 7: Evaluate Predictions
def evaluate_algorithm(train, test, num_neighbors):
    correct = 0
    for row in test:
        prediction = predict_classification(train, row, num_neighbors)
        if prediction == row[-1]:  # Last column contains the actual label
            correct += 1
    return correct

# Convert DataFrame to NumPy arrays for distance calculations
train_data_array = train_encoded.values
test_data_array = test_encoded.values

# Set the number of neighbors
num_neighbors = 3

# Evaluate the algorithm and print the results
correct_predictions = evaluate_algorithm(train_data_array, test_data_array, num_neighbors)
print(f'Number of correctly predicted rows: {correct_predictions}')


Number of correctly predicted rows: 2
