Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in
sklearn.datasets.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [None]:
#load the iris dataset
iris=load_iris()
X=iris.data
y=iris.target

In [None]:
#Splits the dataset into training and testing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
#Feature scaling
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [None]:
# intialize the KNN classifier
k=5
knn=KNeighborsClassifier(n_neighbors=k)

In [7]:
# Train the model
knn.fit(X_train, y_train)


In [8]:
#Make predictions
y_pred=knn.predict(X_test)

In [9]:
#Evaluate the model
accuracy=accuracy_score(y_test,y_pred)
conf_matrix=confusion_matrix(y_test,y_pred)
class_report=classification_report(y_test,y_pred)

In [10]:

print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

Accuracy: 1.00
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in
sklearn.datasets.

The load_boston dataset was previously available in sklearn.datasets, but due to ethical concerns regarding the Boston dataset's origins and potential biases, it has been removed from recent versions of scikit-learn. As a replacement, you can use the load_diabetes dataset, which is also a regression dataset. Below is the Python code to implement the K-Nearest Neighbors (KNN) regressor on the load_diabetes dataset:



In [11]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [12]:
# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target


In [13]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [15]:
# Instantiate the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)


In [16]:
# Train the model
knn_regressor.fit(X_train, y_train)


In [17]:
# Make predictions
y_pred = knn_regressor.predict(X_test)


In [18]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')


Mean Squared Error: 3047.45
R^2 Score: 0.42


Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.

In [19]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [20]:
#load the dataset iris
iris=load_iris()
X,y=iris.data,iris.target

In [21]:
#range of k-value to try
k_range=range(1,31)
k_scores=[]

In [22]:
#perform cross - validation for each K value
for k in k_range:
    knn=KNeighborsClassifier(n_neighbors=k)
    scores=cross_val_score(knn,X,y,cv=10,scoring='accuracy')
    k_scores.append(scores.mean())

In [23]:
# Find the optimal K value (max accuracy)
optimal_k = k_range[np.argmax(k_scores)]


In [24]:
print(f"The optimal value of K is: {optimal_k}")


The optimal value of K is: 13


In [25]:
# Optional: print the corresponding accuracy score
print(f"Accuracy with optimal K: {max(k_scores):.4f}")


Accuracy with optimal K: 0.9800


Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in
sklearn.datasets.

To implement the KNN regressor algorithm with feature scaling on the California housing dataset from sklearn.datasets, you can use the following Python code snippet:



In [27]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load the California housing dataset
california = fetch_california_housing()
X, y = california.data, california.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN Regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)

# Train the model
knn_regressor.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = knn_regressor.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")


Root Mean Squared Error (RMSE): 0.6576


Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on
load_iris dataset in sklearn.datasets.

To implement the KNN classifier algorithm with weighted voting on the load_iris dataset from sklearn.datasets, you can use the following Python code snippet:



In [28]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN Classifier with weighted voting (distance-based)
knn_classifier = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Train the model
knn_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of KNN with weighted voting: {accuracy:.4f}")


Accuracy of KNN with weighted voting: 1.0000


Q6. Implement a function to standardise the features before applying KNN classifier.

To implement a function that standardizes the features before applying the KNN classifier, you can follow this Python code:



In [29]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

def knn_with_standardization(X, y, n_neighbors=5, test_size=0.2, random_state=42):
    """
    Apply KNN classifier with standardized features.

    Parameters:
    - X: Feature matrix
    - y: Target vector
    - n_neighbors: Number of neighbors for KNN
    - test_size: Proportion of the dataset to include in the test split
    - random_state: Seed for reproducibility

    Returns:
    - accuracy: Accuracy of the KNN classifier on the test set
    """

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Initialize the KNN Classifier
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors)

    # Train the model
    knn_classifier.fit(X_train_scaled, y_train)

    # Predict on the test set
    y_pred = knn_classifier.predict(X_test_scaled)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Apply the KNN classifier with standardization
accuracy = knn_with_standardization(X, y)

print(f"Accuracy of KNN with standardized features: {accuracy:.4f}")


Accuracy of KNN with standardized features: 1.0000


Q7. Write a Python function to calculate the euclidean distance between two points.

You can calculate the Euclidean distance between two points in a multi-dimensional space using the following Python function:



In [30]:
import math

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points in a multi-dimensional space.

    Parameters:
    - point1: A list or tuple representing the coordinates of the first point.
    - point2: A list or tuple representing the coordinates of the second point.

    Returns:
    - distance: The Euclidean distance between the two points.
    """

    if len(point1) != len(point2):
        raise ValueError("Both points must have the same number of dimensions")

    # Calculate the squared differences between corresponding dimensions
    squared_differences = [(x - y) ** 2 for x, y in zip(point1, point2)]

    # Sum the squared differences and take the square root to get the Euclidean distance
    distance = math.sqrt(sum(squared_differences))

    return distance

# Example usage
point_a = (1, 2, 3)
point_b = (4, 6, 8)
distance = euclidean_distance(point_a, point_b)
print(f"The Euclidean distance between {point_a} and {point_b} is: {distance:.4f}")


The Euclidean distance between (1, 2, 3) and (4, 6, 8) is: 7.0711


Q8. Write a Python function to calculate the manhattan distance between two points.

You can calculate the Manhattan distance between two points in a multi-dimensional space using the following Python function:



In [31]:
def manhattan_distance(point1, point2):
    """
    Calculate the Manhattan distance between two points in a multi-dimensional space.

    Parameters:
    - point1: A list or tuple representing the coordinates of the first point.
    - point2: A list or tuple representing the coordinates of the second point.

    Returns:
    - distance: The Manhattan distance between the two points.
    """

    if len(point1) != len(point2):
        raise ValueError("Both points must have the same number of dimensions")

    # Calculate the absolute differences between corresponding dimensions
    absolute_differences = [abs(x - y) for x, y in zip(point1, point2)]

    # Sum the absolute differences to get the Manhattan distance
    distance = sum(absolute_differences)

    return distance

# Example usage
point_a = (1, 2, 3)
point_b = (4, 6, 8)
distance = manhattan_distance(point_a, point_b)
print(f"The Manhattan distance between {point_a} and {point_b} is: {distance}")


The Manhattan distance between (1, 2, 3) and (4, 6, 8) is: 12
