In [None]:
# Imports

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.datasets import load_breast_cancer


In [None]:
# Load dataset

data = load_breast_cancer()

In [None]:
# Visualize features 

print("Attributes: \n", data.feature_names)
print("Labels: \n",data.target_names)

Attributes: 
 ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels: 
 ['malignant' 'benign']


In [None]:
# Train and test

X = data.data
y = data.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [None]:
# Data pre-processing (standardization)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train our classification model (KNN)

classifier = KNeighborsClassifier(n_neighbors=10)
classifier.fit(X_train, y_train)

In [None]:
# Cross validation kfold

kfold = KFold(n_splits = 10, random_state = 42, shuffle = True)
scores = cross_val_score(classifier,X,y,cv=kfold)

In [None]:
# Showing our fold results
for i,score in enumerate(scores,start=1):
  print(f"Fold {i}: {score: 4f}")

print(f"Mean accuracy: {np.mean(scores):.4f} \nDesvio padrao: {np.std(scores): .4f}")

Fold 1:  0.982456
Fold 2:  0.964912
Fold 3:  0.964912
Fold 4:  1.000000
Fold 5:  0.859649
Fold 6:  0.877193
Fold 7:  0.982456
Fold 8:  0.894737
Fold 9:  0.912281
Fold 10:  0.910714
Mean accuracy: 0.9349 
Desvio padrao:  0.0472


In [None]:
# Make classifications with our trained model

y_pred = classifier.predict(X_test)

In [None]:
# Evaluate our model's performance

print("Confusion matrix: \n", confusion_matrix(y_test,y_pred))
'''
                      PREDICTED
              Negative        Positive
  R Positive  True Negative   False Positive
  E
  A Negative  False Negative  True Positive
  L

'''

print("\nClassification report: \n", classification_report(y_test,y_pred))

print("\n Accuracy: \n", accuracy_score(y_test,y_pred))

Confusion matrix: 
 [[ 60   3]
 [  2 106]]

Classification report: 
               precision    recall  f1-score   support

           0       0.97      0.95      0.96        63
           1       0.97      0.98      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171


 Accuracy: 
 0.9707602339181286
