### Assignment  73: KNN-3  : Kundan Kumar

![image.png](attachment:d1188252-7be9-491d-b69d-e71d0fb867e0.png)

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)

# Create the KNN classifier with k=3
knn = KNeighborsClassifier(n_neighbors=3)

# Train the KNN classifier on the training data
knn.fit(X_train, y_train)

# Use the trained KNN classifier to predict the test data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9666666666666667


![image.png](attachment:fdca732d-00b6-44ea-93cd-f97f3440b358.png)

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset using pandas
boston = pd.read_csv('boston.csv')

# Split the dataset into features (X) and target variable (y)
X = boston.drop('MEDV', axis=1)
y = boston['MEDV']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create the KNN regressor with k=5
knn = KNeighborsRegressor(n_neighbors=5)

# Train the KNN regressor on the training data
knn.fit(X_train, y_train)

# Use the trained KNN regressor to predict the test data
y_pred = knn.predict(X_test)

# Calculate the mean squared error of the regressor
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 7588938600.0


![image.png](attachment:f3f2cec1-4fb2-4861-aa30-2c003d7334f5.png)

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
iris = load_iris()

# Define the range of K values to test
k_range = range(1, 31)

# Create an empty list to store the mean accuracy scores for each value of K
k_scores = []

# Perform cross-validation for each value of K
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    # Define the cross-validation method (here we use 10-fold cross-validation)
    cv = KFold(n_splits=10, shuffle=True, random_state=42)
    # Calculate the mean accuracy score using cross-validation
    scores = cross_val_score(knn, iris.data, iris.target, cv=cv, scoring='accuracy')
    k_scores.append(scores.mean())

# Find the optimal value of K with the highest mean accuracy score
optimal_k = k_range[k_scores.index(max(k_scores))]
print(f"Optimal value of K: {optimal_k}")

Optimal value of K: 14


![image.png](attachment:42fcb3f5-f08b-4aa4-b376-23711233c480.png)

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the dataset from a CSV file
data = pd.read_csv("boston.csv")

# Split the dataset into features (X) and target variable (y)
X = data.drop('MEDV', axis=1)
y = data['MEDV']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Perform feature scaling on the training and testing data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create the KNN regressor with k=5
knn = KNeighborsRegressor(n_neighbors=5)

# Train the KNN regressor on the training data
knn.fit(X_train, y_train)

# Use the trained KNN regressor to predict the test data
y_pred = knn.predict(X_test)

# Calculate the mean squared error of the regressor
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 4694646600.0


![image.png](attachment:3ecc55f6-cb79-454a-affb-f04ce8d0ba57.png)

In [5]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
iris = load_iris()

# Split the dataset into features (X) and target variable (y)
X = iris.data
y = iris.target

# Create the KNN classifier with k=5 and weighted voting
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Train the KNN classifier on the entire dataset
knn.fit(X, y)

# Predict the class labels for new data
new_data = [[6.1, 3.1, 5.1, 1.9], [5.7, 2.8, 4.1, 1.3]]
y_pred = knn.predict(new_data)

print("Predicted class labels:", y_pred)

Predicted class labels: [2 1]


![image.png](attachment:44c22a0b-4e84-4eb2-a287-8a2025263248.png)

In [6]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Load the dataset
iris = load_iris()

# Split the dataset into features (X) and target variable (y)
X = iris.data
y = iris.target

# Define a function to standardize the features
def standardize(X):
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X)
    return X_std

# Standardize the features
X_std = standardize(X)

# Create the KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the KNN classifier on the standardized data
knn.fit(X_std, y)

# Predict the class labels for new data
new_data = [[6.1, 3.1, 5.1, 1.9], [5.7, 2.8, 4.1, 1.3]]
new_data_std = standardize(new_data)
y_pred = knn.predict(new_data_std)

print("Predicted class labels:", y_pred)

Predicted class labels: [2 0]


![image.png](attachment:a68a6658-4be8-4975-9e09-f45b0e114437.png)

In [7]:
from sklearn.datasets import load_iris
import math

# Load the iris dataset
iris = load_iris()

# Select two random data points from the dataset
x1 = iris.data[0]
x2 = iris.data[50]

# Define a function to calculate the Euclidean distance
def euclidean_distance(x1, x2):
    distance = 0
    for i in range(len(x1)):
        distance += (x1[i] - x2[i])**2
    distance = math.sqrt(distance)
    return distance

# Calculate the Euclidean distance between the two points
distance = euclidean_distance(x1, x2)

# Print the result
print("Euclidean distance between x1 and x2:", distance)

Euclidean distance between x1 and x2: 4.003748243833521


![image.png](attachment:5de7f7b0-ef79-44c3-8a69-adbcb0c73734.png)


In [8]:
from sklearn.datasets import load_iris

# Load the iris dataset
iris = load_iris()

# Select two random data points from the dataset
x1 = iris.data[0]
x2 = iris.data[50]

# Define a function to calculate the Manhattan distance
def manhattan_distance(x1, x2):
    distance = 0
    for i in range(len(x1)):
        distance += abs(x1[i] - x2[i])
    return distance

# Calculate the Manhattan distance between the two points
distance = manhattan_distance(x1, x2)

# Print the result
print("Manhattan distance between x1 and x2:", distance)

Manhattan distance between x1 and x2: 6.7
