In [1]:
'''Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.'''

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  

# Train the KNN classifier
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the KNN Classifier: {accuracy}")


Accuracy of the KNN Classifier: 1.0


In [3]:
'''Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in sklearn.datasets.'''
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the boston dataset
boston = load_boston()
X = boston.data  # Features
y = boston.target  # Target variable (housing prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)  # You can adjust n_neighbors as needed

# Train the KNN regressor
knn_reg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_reg.predict(X_test)

# Calculate evaluation metrics (MSE and R-squared)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared: {r2}")


ImportError: 
`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    target = raw_df.values[1::2, 2]

Alternative datasets include the California housing dataset and the
Ames housing dataset. You can load the datasets as follows::

    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()

for the California housing dataset and::

    from sklearn.datasets import fetch_openml
    housing = fetch_openml(name="house_prices", as_frame=True)

for the Ames housing dataset.

[1] M Carlisle.
"Racist data destruction?"
<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>

[2] Harrison Jr, David, and Daniel L. Rubinfeld.
"Hedonic housing prices and the demand for clean air."
Journal of environmental economics and management 5.1 (1978): 81-102.
<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>


In [4]:
'''
Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in 
sklearn.datasets.
'''
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels

# Create a KNN classifier
knn = KNeighborsClassifier()

# Define a range of K values to try
param_grid = {'n_neighbors': range(1, 31)}  # Testing K values from 1 to 30

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

# Get the best K value
best_k = grid_search.best_params_['n_neighbors']
best_score = grid_search.best_score_

print(f"Best K value: {best_k}")
print(f"Best cross-validation accuracy: {best_score:.4f}")


Best K value: 6
Best cross-validation accuracy: 0.9800


In [5]:
'''
Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in sklearn.datasets.
'''
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston dataset
boston = load_boston()
X = boston.data  # Features
y = boston.target  # Target variable (housing prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform the testing data using the same scaler
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)  # You can adjust n_neighbors as needed

# Train the KNN regressor on scaled data
knn_reg.fit(X_train_scaled, y_train)

# Make predictions on the scaled test set
y_pred = knn_reg.predict(X_test_scaled)

# Calculate evaluation metrics (MSE and R-squared)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE) with scaled features: {mse}")
print(f"R-squared with scaled features: {r2}")


ImportError: 
`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    target = raw_df.values[1::2, 2]

Alternative datasets include the California housing dataset and the
Ames housing dataset. You can load the datasets as follows::

    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()

for the California housing dataset and::

    from sklearn.datasets import fetch_openml
    housing = fetch_openml(name="house_prices", as_frame=True)

for the Ames housing dataset.

[1] M Carlisle.
"Racist data destruction?"
<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>

[2] Harrison Jr, David, and Daniel L. Rubinfeld.
"Hedonic housing prices and the demand for clean air."
Journal of environmental economics and management 5.1 (1978): 81-102.
<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>


In [6]:
'''
Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.
'''
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with weighted voting
knn_weighted = KNeighborsClassifier(n_neighbors=5, weights='distance')  # Using weighted voting

# Train the KNN classifier
knn_weighted.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_weighted.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the KNN Classifier with weighted voting: {accuracy}")


Accuracy of the KNN Classifier with weighted voting: 1.0


In [7]:
'''
Q6. Implement a function to standardise the features before applying KNN classifier.
'''
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Function to standardize features and apply KNN classifier
def knn_classifier_with_standardization(X_train, X_test, y_train, y_test, n_neighbors=5):
    # Standardize features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Initialize KNN classifier
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    # Train KNN classifier on standardized data
    knn.fit(X_train_scaled, y_train)

    # Make predictions on the standardized test set
    y_pred = knn.predict(X_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of the KNN Classifier with standardized features: {accuracy}")
    return knn  # Return the trained classifier if needed

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply the knn_classifier_with_standardization function
knn_model = knn_classifier_with_standardization(X_train, X_test, y_train, y_test, n_neighbors=5)


Accuracy of the KNN Classifier with standardized features: 1.0


In [9]:
'''
Q7. Write a Python function to calculate the euclidean distance between two points.
'''
import math

def euclidean_distance(point1, point2):
    if len(point1) != len(point2):
        raise ValueError("The dimensions of the points must be the same.")
    
    distance = 0.0
    for i in range(len(point1)):
        distance += (point1[i] - point2[i]) ** 2
    
    return math.sqrt(distance)

# Example usage:
point_a = (1, 2)  # Coordinates of point A
point_b = (4, 5)  # Coordinates of point B

# Calculate Euclidean distance between point A and point B
distance = euclidean_distance(point_a, point_b)
print(f"Euclidean distance between point A and point B: {distance}")


Euclidean distance between point A and point B: 4.242640687119285


In [10]:
'''
Q8. Write a Python function to calculate the manhattan distance between two points.
'''
def manhattan_distance(point1, point2):
    if len(point1) != len(point2):
        raise ValueError("The dimensions of the points must be the same.")

    distance = 0.0
    for i in range(len(point1)):
        distance += abs(point1[i] - point2[i])
    
    return distance

# Example usage:
point_x = (1, 2)  # Coordinates of point X
point_y = (4, 5)  # Coordinates of point Y

# Calculate Manhattan distance between point X and point Y
distance = manhattan_distance(point_x, point_y)
print(f"Manhattan distance between point X and point Y: {distance}")


Manhattan distance between point X and point Y: 6.0
