In [None]:
Breast Cancer Classification Using K-Nearest Neighbors (KNN)
Objective
This notebook demonstrates the application of the K-Nearest Neighbors (KNN) algorithm to classify breast cancer tumors as Malignant (M) or Benign (B). The workflow includes:

Loading and exploring the dataset

Preprocessing the data

Training a KNN model

Saving the trained model

Loading the saved model for inference

Evaluating model performance on new data



In [2]:
!pip install numpy pandas matplotlib seaborn scikit-learn




In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import joblib  # For saving & loading model


df = pd.read_csv("C:\\Users\\SIDDHARTH U\\Downloads\\data.csv")


df.drop(['Unnamed: 32', 'id'], axis=1, inplace=True)


df['diagnosis'] = df['diagnosis'].apply(lambda x: 1 if x == "M" else 0)


X = df.iloc[:, 1:].values
y = df['diagnosis'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)


knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train, y_train)


accuracy = knn.score(X_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.94


In [5]:
joblib.dump(knn, "knn_model.pkl")
print("Model saved as knn_model.pkl")


Model saved as knn_model.pkl


In [6]:
# Load the saved model
knn_loaded = joblib.load("knn_model.pkl")
print("Model Loaded Successfully!")

# Example: Predict on new data
new_data = X_test[:5]  # Take 5 samples from test data
predictions = knn_loaded.predict(new_data)

print("Predictions:", predictions)
print("Actual Labels:", y_test[:5])


Model Loaded Successfully!
Predictions: [1 0 0 1 0]
Actual Labels: [1 0 0 1 0]
