# Evaluating Classifiers

In [2]:
from pandas import read_csv
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler

## Here we read the data 
breast_cancer = read_csv('wisconsin_breast_cancer.csv')

## Here we select the variables of interest
breast_cancer = breast_cancer[['diagnosis', 'area_worst', 'radius_worst',  
                               'compactness_worst']]

## Here we recode diagnosis
breast_cancer['diagnosis'] = breast_cancer.diagnosis.map({'B':0, 'M':1})

## Here we define the predictors and target 
X = breast_cancer[['area_worst', 'radius_worst', 'compactness_worst']]
Y = breast_cancer[['diagnosis']]

## Here we create the MinMaxScaler object
scaler = MinMaxScaler()

## Here we normalize the predictor variables
X = scaler.fit_transform(X)

## Here we split the data into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, 
                                                          random_state = 8)

## Here we fit a three nearest neighbor to the data 
three_nearest_neighbors = KNeighborsClassifier(n_neighbors = 3).fit(X_train, 
                                                                    Y_train)

## Here we predict on the testing data 
three_nearest_neighbors_preds = three_nearest_neighbors.predict(X_test)  

## Here we evaluate the performance of the model
print('The confusion matrix is:')
print(confusion_matrix(three_nearest_neighbors_preds, Y_test))
print('The accuracy is: ', accuracy_score(three_nearest_neighbors_preds, Y_test))
print('Classification report:')
print(classification_report(three_nearest_neighbors_preds, Y_test))

The confusion matrix is:
[[100   9]
 [  5  57]]
The accuracy is:  0.9181286549707602
Classification report:
              precision    recall  f1-score   support

           0       0.95      0.92      0.93       109
           1       0.86      0.92      0.89        62

    accuracy                           0.92       171
   macro avg       0.91      0.92      0.91       171
weighted avg       0.92      0.92      0.92       171



  return self._fit(X, y)
