In [14]:
#importing useful libraries 

from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import StandardScaler

In [15]:
#load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [16]:
# Split the dataset into training, validation, and testing data

# In the first step we will split the data in training and remaining dataset
X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.7 ,random_state=42)

# Now since we want the valid and test size to be equal (15% each of overall data). 
# we have to define valid_size=0.5 (that is 50% of remaining data)

test_size = 0.5
X_val, X_test, y_val, y_test = train_test_split(X_rem,y_rem, test_size=0.5 , random_state = 42)


In [17]:
# Normalize the dataset using the StandardScaler class

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [18]:
# Find the best value of k for the KNN classifier using GridSearchCV

k_range = list(range(1, 31))
param_grid = dict(n_neighbors=k_range)
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid.fit(X_trainval, y_trainval)
best_k = grid.best_params_['n_neighbors']
print("Best k:", best_k)

Best k: 3




In [19]:
# Train the KNN classifier using the training data

knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [20]:
# Test the trained model using the testing data

y_pred = knn.predict(X_test)

In [21]:
# Evaluate the performance of the model using the accuracy score

train_acc = accuracy_score(y_train, knn.predict(X_train))
val_acc = accuracy_score(y_val, knn.predict(X_val))
test_acc = accuracy_score(y_test, y_pred)

print("Training Accuracy:", train_acc)
print("Validation Accuracy:", val_acc)
print("Testing Accuracy:", test_acc)

Training Accuracy: 0.9428571428571428
Validation Accuracy: 1.0
Testing Accuracy: 1.0


In [22]:
# Confusion Matrix
print(confusion_matrix(y_test, y_pred))

[[ 6  0  0]
 [ 0 10  0]
 [ 0  0  7]]
