# 22_April_Assignment

### Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)

# Create the KNN classifier with k=3
knn = KNeighborsClassifier(n_neighbors=3)

# Train the KNN classifier on the training data
knn.fit(X_train, y_train)

# Use the trained KNN classifier to predict the test data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9666666666666667


### Q2.  Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in sklearn.datasets.

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
iris = load_iris()

# Define the range of K values to test
k_range = range(1, 31)

# Create an empty list to store the mean accuracy scores for each value of K
k_scores = []

# Perform cross-validation for each value of K
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    # Define the cross-validation method (here we use 10-fold cross-validation)
    cv = KFold(n_splits=10, shuffle=True, random_state=42)
    # Calculate the mean accuracy score using cross-validation
    scores = cross_val_score(knn, iris.data, iris.target, cv=cv, scoring='accuracy')
    k_scores.append(scores.mean())

# Find the optimal value of K with the highest mean accuracy score
optimal_k = k_range[k_scores.index(max(k_scores))]
print(f"Optimal value of K: {optimal_k}")

Optimal value of K: 10


### Q3. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.

In [9]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
iris = load_iris()

# Split the dataset into features (X) and target variable (y)
X = iris.data
y = iris.target

# Create the KNN classifier with k=5 and weighted voting
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Train the KNN classifier on the entire dataset
knn.fit(X, y)

# Predict the class labels for new data
new_data = [[6.1, 3.1, 5.1, 1.9], [5.7, 2.8, 4.1, 1.3]]
y_pred = knn.predict(new_data)


### Q4.Implement a function to standardise the features before applying KNN classifier.

In [10]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Load the dataset
iris = load_iris()

# Split the dataset into features (X) and target variable (y)
X = iris.data
y = iris.target

# Define a function to standardize the features
def standardize(X):
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X)
    return X_std

# Standardize the features
X_std = standardize(X)

# Create the KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the KNN classifier on the standardized data
knn.fit(X_std, y)

# Predict the class labels for new data
new_data = [[6.1, 3.1, 5.1, 1.9], [5.7, 2.8, 4.1, 1.3]]
new_data_std = standardize(new_data)
y_pred = knn.predict(new_data_std)

print("Predicted class labels:", y_pred)

Predicted class labels: [2 0]


### Q5.Write a Python function to calculate the euclidean distance between two points.

In [11]:
from sklearn.datasets import load_iris
import math

# Load the iris dataset
iris = load_iris()

# Select two random data points from the dataset
x1 = iris.data[0]
x2 = iris.data[50]

# Define a function to calculate the Euclidean distance
def euclidean_distance(x1, x2):
    distance = 0
    for i in range(len(x1)):
        distance += (x1[i] - x2[i])**2
    distance = math.sqrt(distance)
    return distance

# Calculate the Euclidean distance between the two points
distance = euclidean_distance(x1, x2)

# Print the result
print("Euclidean distance between x1 and x2:", distance)

Euclidean distance between x1 and x2: 4.003748243833521


### Q6. Write a Python function to calculate the manhattan distance between two points.

In [12]:
from sklearn.datasets import load_iris

# Load the iris dataset
iris = load_iris()

# Select two random data points from the dataset
x1 = iris.data[0]
x2 = iris.data[50]

# Define a function to calculate the Manhattan distance
def manhattan_distance(x1, x2):
    distance = 0
    for i in range(len(x1)):
        distance += abs(x1[i] - x2[i])
    return distance

# Calculate the Manhattan distance between the two points
distance = manhattan_distance(x1, x2)

# Print the result
print("Manhattan distance between x1 and x2:", distance)

Manhattan distance between x1 and x2: 6.7
