In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import warnings 

warnings.filterwarnings('ignore')

df = pd.DataFrame(load_breast_cancer().data, columns = load_breast_cancer().feature_names)
df = df[[i for i in df.columns if 'mean' in i]]
x_train = df.sample(n=100)
x_test = pd.merge(df,x_train, indicator=True, how='outer').query('_merge=="left_only"').drop('_merge', axis=1)
y_train = np.where((x_train['mean radius'] >= 15) | 
                            (x_train['mean texture'] >= 20) | 
                            (x_train['mean perimeter'] >= 90) | 
                            (x_train['mean area'] >= 600) | 
                            (x_train['mean compactness'] >= 0.2) | 
                            (x_train['mean concave points'] >= 0.05), 'Malignant', 'Benign')
y_test = np.where((x_test['mean radius'] >= 15) | 
                            (x_test['mean texture'] >= 20) | 
                            (x_test['mean perimeter'] >= 90) | 
                            (x_test['mean area'] >= 600) | 
                            (x_test['mean compactness'] >= 0.2) | 
                            (x_test['mean concave points'] >= 0.05), 'Malignant', 'Benign')

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

best_k = 0
best_accuracy = 0
for i in range(1, 100):
    knn = neighbors.KNeighborsClassifier(n_neighbors=i)
    y_pred=knn.fit(x_train,y_train).predict(x_test)
    if classification_report(y_test, y_pred, output_dict=True)['accuracy'] > best_accuracy:
        best_k = i
        best_accuracy = classification_report(y_test, y_pred, output_dict=True)['accuracy']

knn = neighbors.KNeighborsClassifier(n_neighbors=best_k)
y_pred=knn.fit(x_train,y_train).predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      Benign       0.79      0.94      0.86       176
   Malignant       0.96      0.85      0.90       293

    accuracy                           0.88       469
   macro avg       0.88      0.90      0.88       469
weighted avg       0.90      0.88      0.89       469

