# importing all the important libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

# Load the dataset

In [2]:
cancer_data=load_breast_cancer()

# Convert to dataframe

In [3]:
df=pd.DataFrame(cancer_data.data,columns=cancer_data.feature_names)
df['target']=cancer_data.target

In [4]:
df.head

<bound method NDFrame.head of      mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean conca

# separate the features and target variable

In [5]:
x=df.drop('target',axis=1)
y=df['target']

# Split the data into training and testing data

In [6]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

# Standadizing the features

In [7]:
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

# Train the classifiation model

In [8]:
knn=KNeighborsClassifier(n_neighbors=4)
knn.fit(x_train,y_train)

# Evaluate the model

In [9]:
y_pred=knn.predict(x_test)
print("Confusion matrix: ")
print(confusion_matrix(y_pred,y_test))
print("\nClassification report: ")
print(classification_report(y_pred,y_test))
print("\nAccuracy score: ")
print(accuracy_score(y_pred,y_test))

Confusion matrix: 
[[41  3]
 [ 2 68]]

Classification report: 
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        44
           1       0.96      0.97      0.96        70

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Accuracy score: 
0.956140350877193


# Make predictions

In [10]:
sample_data=pd.DataFrame([[17.99, 10.38, 122.8, 1001.0, 0.1184, 0.2776, 0.3001, 0.1471, 0.2419, 0.07871, 
                             1.095, 0.9053, 8.589, 153.4, 0.006399, 0.04904, 0.05373, 0.01587, 0.03003, 0.006193, 
                             25.38, 17.33, 184.6, 2019.0, 0.1622, 0.6656, 0.7119, 0.2654, 0.4601, 0.1189]], 
                            columns=cancer_data.feature_names)
sample_data=scaler.transform(sample_data)
prediction=knn.predict(sample_data)
print("Prediction (0 = malignant, 1 = benign):",prediction)

Prediction (0 = malignant, 1 = benign): [0]
