# Evaluating k-NN Classifier

In [2]:
from pandas import read_csv
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler

## Here we read the data 
breast_cancer = read_csv('wisconsin_breast_cancer.csv')

## Here we select the variables of interest
breast_cancer = breast_cancer[['diagnosis', 'area_worst', 'radius_worst',  
                               'compactness_worst']]

## Here we recode diagnosis
breast_cancer['diagnosis'] = breast_cancer.diagnosis.map({'B':0, 'M':1})

## Here we define the predictors and target 
X = breast_cancer[['area_worst', 'radius_worst', 'compactness_worst']]
Y = breast_cancer[['diagnosis']]

## Here we create the MinMaxScaler object
scaler = MinMaxScaler()

## Here we normalize the predictor variables
X = scaler.fit_transform(X)

## Here we split the data into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, 
                                                          random_state = 8)

## Here we fit a three nearest neighbor to the data 
three_nearest_neighbors = KNeighborsClassifier(n_neighbors = 3).fit(X_train, 
                                                                    Y_train)

## Here we predict on the testing data 
three_nearest_neighbors_preds = three_nearest_neighbors.predict(X_test)  

## Here we evaluate the performance of the model
print('The confusion matrix is:')
print(confusion_matrix(three_nearest_neighbors_preds, Y_test))
print('The accuracy is: ', accuracy_score(three_nearest_neighbors_preds, Y_test))
print('Classification report:')
print(classification_report(three_nearest_neighbors_preds, Y_test))

The confusion matrix is:
[[100   9]
 [  5  57]]
The accuracy is:  0.9181286549707602
Classification report:
              precision    recall  f1-score   support

           0       0.95      0.92      0.93       109
           1       0.86      0.92      0.89        62

    accuracy                           0.92       171
   macro avg       0.91      0.92      0.91       171
weighted avg       0.92      0.92      0.92       171



  return self._fit(X, y)


# Evaluating Random Forest Classifier

In [3]:
from sklearn.ensemble import RandomForestClassifier

## Here we create the random forest object
random_forest = RandomForestClassifier()

## Here we fit the random forest to the training set
rf_md = random_forest.fit(X_train, Y_train)

## Here we use the random forest to predict on test set
rf_preds = rf_md.predict(X_test)

## Here we evaluate the performance of the model
print('The confusion matrix is:')
print(confusion_matrix(rf_preds, Y_test))
print('The accuracy is: ', accuracy_score(rf_preds, Y_test))
print('Classification report:')
print(classification_report(rf_preds, Y_test))



The confusion matrix is:
[[100   7]
 [  5  59]]
The accuracy is:  0.9298245614035088
Classification report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       107
           1       0.89      0.92      0.91        64

    accuracy                           0.93       171
   macro avg       0.92      0.93      0.93       171
weighted avg       0.93      0.93      0.93       171



# Evaluating Gradient Boosting Classifier

In [4]:
from sklearn.ensemble import GradientBoostingClassifier

## Here we create the gradient boost model object
boosting_classifier = GradientBoostingClassifier(n_estimators = 500, 
                                                 max_depth = 3)

## Here we fit the model to the training set 
gbm_md = boosting_classifier.fit(X_train, Y_train)

## Here we predict on the testing data
gbm_preds = gbm_md.predict(X_test)

## Here we evaluate the performance of the model
print('The confusion matrix is:')
print(confusion_matrix(gbm_preds, Y_test))
print('The accuracy is: ', accuracy_score(gbm_preds, Y_test))
print('Classification report:')
print(classification_report(gbm_preds, Y_test))

  return f(*args, **kwargs)


The confusion matrix is:
[[101   7]
 [  4  59]]
The accuracy is:  0.935672514619883
Classification report:
              precision    recall  f1-score   support

           0       0.96      0.94      0.95       108
           1       0.89      0.94      0.91        63

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.93       171
weighted avg       0.94      0.94      0.94       171



# Evaluating k-NN Regressor

In [5]:
from sklearn.neighbors import KNeighborsRegressor
from numpy import sqrt
from numpy import mean

## Here we read the data 
auto = read_csv('auto.csv')

## Here we define the predictors and target
X = auto[['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration']]
Y = auto[['mpg']]
Y = Y.values.reshape(-1, 1)

## Here we create the MinMaxScaler object
scaler = MinMaxScaler()

## Here normalize the predictors
X = scaler.fit_transform(X)

## Here we split the data into training (70%) and testing (30%)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, 
                                                    random_state = 7)

## Here we fit a three nearest neighbor to the data 
three_nearest_neighbors = KNeighborsRegressor(n_neighbors = 3).fit(X_train, 
                                                                   Y_train)

## Here we predict on the testing data 
three_nearest_neighbors_preds = three_nearest_neighbors.predict(X_test)

## Here we compute the RMSE
RMSE_three_nearest_neighbors_preds = sqrt(mean((three_nearest_neighbors_preds - 
                                                Y_test)**2))
print('The RMSE of the 3-NN is: ', RMSE_three_nearest_neighbors_preds)

## Here we compute the MAE
MAE_three_nearest_neighbors_preds = mean(abs(three_nearest_neighbors_preds - 
                                             Y_test))
print('The RMSE of the 3-NN is: ', MAE_three_nearest_neighbors_preds)

The RMSE of the 3-NN is:  4.18565616291213
The RMSE of the 3-NN is:  3.1
