In [13]:
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Modified dataset
weather = ['rainy', 'overcast', 'sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'sunny', 'overcast']
temperature = ['cool', 'hot', 'mild', 'cool', 'mild', 'mild', 'hot', 'cool', 'hot']
play = ['yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'no']

# Encoding features
weather_encoder = preprocessing.LabelEncoder()
temperature_encoder = preprocessing.LabelEncoder()
play_encoder = preprocessing.LabelEncoder()

weather_encoded = weather_encoder.fit_transform(weather)
temperature_encoded = temperature_encoder.fit_transform(temperature)
play_encoded = play_encoder.fit_transform(play)

# Combine encoded features
features = list(zip(weather_encoded, temperature_encoded))

# Split the dataset
features_train, features_test, label_train, label_test = train_test_split(features, play_encoded, test_size=0.2, random_state=42)

# Create and train the KNN model
model = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
model.fit(features_train, label_train)

# Make predictions
predicted = model.predict(features_test)

# Print results
print("Predictions:", predicted)

conf_mat = confusion_matrix(label_test, predicted)
print("Confusion Matrix:")
print(conf_mat)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)


Predictions: [1 1]
Confusion Matrix:
[[0 1]
 [0 1]]
Accuracy: 0.5


In [15]:
import numpy as np
from collections import Counter

# New training samples
training_data = [
    (8, 5, 'X'),  # Sample 1
    (3, 1, 'Y'),  # Sample 2
    (6, 4, 'X'),  # Sample 3
    (2, 7, 'Y')   # Sample 4
]

# New query instance
query_instance = (4, 6)

# Step 1: Calculate Euclidean Distance between query and each training sample
distances = []
for x1, x2, label in training_data:
    distance = np.sqrt((query_instance[0] - x1) ** 2 + (query_instance[1] - x2) ** 2)
    distances.append((distance, label))

# Step 2: Sort distances and select the nearest neighbors (K=3)
k = 3
sorted_distances = sorted(distances)  # Sort by distance
nearest_neighbors = sorted_distances[:k]  # Take the first k neighbors

# Step 3: Gather the classes of nearest neighbors
neighbor_classes = [label for _, label in nearest_neighbors]

# Step 4: Determine the majority class
prediction = Counter(neighbor_classes).most_common(1)[0][0]

# Print results
print("Distances to each training sample:", distances)
print("Nearest neighbors:", nearest_neighbors)
print("Predicted class for query instance (4, 6):", prediction)


Distances to each training sample: [(4.123105625617661, 'X'), (5.0990195135927845, 'Y'), (2.8284271247461903, 'X'), (2.23606797749979, 'Y')]
Nearest neighbors: [(2.23606797749979, 'Y'), (2.8284271247461903, 'X'), (4.123105625617661, 'X')]
Predicted class for query instance (4, 6): X
