Apply KNN on Breast Cancer dataset.

In [12]:
#Import Required Libraries:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [13]:
#Load and Explore the Dataset:
data = load_breast_cancer()
X = data.data
y = data.target

In [14]:
#Preprocess the Data:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
#Train the KNN Model:
k_value = 5  # You can change this value as needed

knn_classifier = KNeighborsClassifier(n_neighbors=k_value)
knn_classifier.fit(X_train_scaled, y_train)

In [16]:
#Make Predictions:
y_pred = knn_classifier.predict(X_test_scaled)

In [17]:
#Evaluate the Model:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.95


### Hyperparameter tuning
#### GridSearchCV

In [7]:
from sklearn.model_selection import GridSearchCV

In [8]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # You can adjust the range of values
    'weights': ['uniform', 'distance'],
    'p': [1, 2]  # For Manhattan (p=1) and Euclidean (p=2) distances
}

In [9]:
knn_classifier = KNeighborsClassifier()
grid_search = GridSearchCV(knn_classifier, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

In [10]:
best_params = grid_search.best_params_
best_accuracy = grid_search.best_score_

In [11]:
print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

Best Parameters: {'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}
Best Accuracy: 0.9648351648351647
