# Evaluation of models

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib as jb

from yellowbrick.classifier import ConfusionMatrix, PrecisionRecallCurve, ROCAUC
from sklearn.metrics import (confusion_matrix, 
                             classification_report, 
                             roc_auc_score, 
                             accuracy_score,
                             precision_recall_curve)

import xgboost as xgb

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load data X tests to models Group Alpha: 
X_test_alpha = np.loadtxt('../data/processed/X_data/X_test.csv', delimiter=',') 
X_test_alpha_feature = np.loadtxt('../data/processed/X_data/X_test_feature.csv', delimiter=',')

# Load data X tests to models Group Beta:
X_test_beta = np.loadtxt('../data/processed/X_data2/X_test2.csv', delimiter=',') 
X_test_beta_feature = np.loadtxt('../data/processed/X_data2/X_test2_features.csv', delimiter=',') 

# Load data target test
y_test = np.loadtxt('../data/processed/y_test.csv')

print(
    f' Shape X_test_alpha:{X_test_alpha.shape}\n',
    f'Shape X_teste_alpha_feature:{X_test_alpha_feature.shape}\n\n',
    f'Shape X_test_beta:{X_test_beta.shape}\n',
    f'Shape X_test_beta_feature{X_test_beta_feature.shape}'
)

 Shape X_test_alpha:(114, 30)
 Shape X_teste_alpha_feature:(114, 9)

 Shape X_test_beta:(114, 24)
 Shape X_test_beta_feature(114, 7)


In [3]:
# Load models
model_dir = '../models/'
models_dict = {}

for model_file in os.listdir(model_dir):
    if model_file.endswith('.pkl'):
        model_path = os.path.join(model_dir, model_file)
        model = jb.load(model_path)
        model_name = model_file.split('.')[0]
        models_dict[model_name] = model
        print(f'Load model name: {model_name}')

Load model name: model_alpha_rf_feature
Load model name: model_alpha_rf
Load model name: model_beta_rf
Load model name: model_alpha_svc
Load model name: model_beta_xgb
Load model name: model_beta_svc
Load model name: model_beta_svc_feature
Load model name: model_alpha_xgb_feature
Load model name: model_beta_rf_feature
Load model name: model_alpha_xgb
Load model name: model_alpha_svc_feature
Load model name: model_beta_xgb_feature


In [4]:
# Dict to save AUC results:
auc_results ={}

In [5]:
# Creating a function to summary models results:
def summary_classification(model_name:str, X_test):
    print(model_name+':')
    model = models_dict[model_name]
    print(classification_report(y_test, model.predict(X_test)))
    print(f'AUC:{round(roc_auc_score(y_test, model.predict(X_test)),3)}')

    # save auc value into dict
    auc_results[model_name] = [round(roc_auc_score(y_test, model.predict(X_test)),3)]
    
    # plot confusion matrix as heatmap
    conf_mat = confusion_matrix(y_test, model.predict(X_test))
    sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g')
    plt.title('Confusion Matrix - Model: '+model_name)
    plt.xlabel('Predict')
    plt.ylabel('Real')

    # save figure
    plt.savefig('../reports/figures/models/confusion_matrix_'+model_name+'.png')
    plt.close()

## Summary classification (Precision, recall and F1-score)

### Random Forest 

#### Group model Alpha

In [6]:
# All feature:
summary_classification('model_alpha_rf', X_test_alpha)

model_alpha_rf:
              precision    recall  f1-score   support

         0.0       0.99      0.89      0.94        75
         1.0       0.83      0.97      0.89        39

    accuracy                           0.92       114
   macro avg       0.91      0.93      0.92       114
weighted avg       0.93      0.92      0.92       114

AUC:0.934


In [7]:
# Feature seletec: 
summary_classification('model_alpha_rf_feature', X_test_alpha_feature)

model_alpha_rf_feature:
              precision    recall  f1-score   support

         0.0       0.98      0.87      0.92        75
         1.0       0.79      0.97      0.87        39

    accuracy                           0.90       114
   macro avg       0.89      0.92      0.90       114
weighted avg       0.92      0.90      0.91       114

AUC:0.921


#### Group Model Beta

In [8]:
# All feature :
summary_classification('model_beta_rf', X_test_beta)

model_beta_rf:
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.94        75
         1.0       0.84      0.97      0.90        39

    accuracy                           0.93       114
   macro avg       0.91      0.94      0.92       114
weighted avg       0.94      0.93      0.93       114

AUC:0.941


In [9]:
# Feature seleted:
summary_classification('model_beta_rf_feature', X_test_beta_feature)

model_beta_rf_feature:
              precision    recall  f1-score   support

         0.0       0.99      0.88      0.93        75
         1.0       0.81      0.97      0.88        39

    accuracy                           0.91       114
   macro avg       0.90      0.93      0.91       114
weighted avg       0.92      0.91      0.91       114

AUC:0.927


### XGBOOSTClassifier

#### Group model Alpha

In [10]:
# All feature: 
summary_classification('model_alpha_xgb', X_test_alpha)

model_alpha_xgb:
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.94        75
         1.0       0.84      0.97      0.90        39

    accuracy                           0.93       114
   macro avg       0.91      0.94      0.92       114
weighted avg       0.94      0.93      0.93       114

AUC:0.941


In [11]:
# Feature selected
summary_classification('model_alpha_xgb_feature', X_test_alpha_feature)

model_alpha_xgb_feature:
              precision    recall  f1-score   support

         0.0       0.99      0.88      0.93        75
         1.0       0.81      0.97      0.88        39

    accuracy                           0.91       114
   macro avg       0.90      0.93      0.91       114
weighted avg       0.92      0.91      0.91       114

AUC:0.927


#### Group Model Beta

In [12]:
# All feature:
summary_classification('model_beta_xgb', X_test_beta)

model_beta_xgb:
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95        75
         1.0       0.86      0.97      0.92        39

    accuracy                           0.94       114
   macro avg       0.92      0.95      0.93       114
weighted avg       0.94      0.94      0.94       114

AUC:0.947


In [13]:
# Feature selected:
summary_classification('model_beta_xgb_feature', X_test_beta_feature)

model_beta_xgb_feature:
              precision    recall  f1-score   support

         0.0       0.98      0.87      0.92        75
         1.0       0.79      0.97      0.87        39

    accuracy                           0.90       114
   macro avg       0.89      0.92      0.90       114
weighted avg       0.92      0.90      0.91       114

AUC:0.921


### SVM Classifier

#### Group model Alpha

In [14]:
# All feature:
summary_classification('model_alpha_svc', X_test_alpha)

model_alpha_svc:
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95        75
         1.0       0.86      0.97      0.92        39

    accuracy                           0.94       114
   macro avg       0.92      0.95      0.93       114
weighted avg       0.94      0.94      0.94       114

AUC:0.947


In [15]:
# Feature selected:
summary_classification('model_alpha_svc_feature',X_test_alpha_feature)

model_alpha_svc_feature:
              precision    recall  f1-score   support

         0.0       0.99      0.89      0.94        75
         1.0       0.83      0.97      0.89        39

    accuracy                           0.92       114
   macro avg       0.91      0.93      0.92       114
weighted avg       0.93      0.92      0.92       114

AUC:0.934


#### Group model Beta

In [16]:
# All feature: 
summary_classification('model_beta_svc', X_test_beta)

model_beta_svc:
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.94        75
         1.0       0.84      0.97      0.90        39

    accuracy                           0.93       114
   macro avg       0.91      0.94      0.92       114
weighted avg       0.94      0.93      0.93       114

AUC:0.941


In [17]:
# Feature selected:
summary_classification('model_beta_svc_feature', X_test_beta_feature)

model_beta_svc_feature:
              precision    recall  f1-score   support

         0.0       0.99      0.88      0.93        75
         1.0       0.81      0.97      0.88        39

    accuracy                           0.91       114
   macro avg       0.90      0.93      0.91       114
weighted avg       0.92      0.91      0.91       114

AUC:0.927


In [18]:
# Summarise AUC results:  
smr = pd.DataFrame(auc_results).T
smr.columns=['AUC']
smr.sort_values(by='AUC', ascending=False)

Unnamed: 0,AUC
model_beta_xgb,0.947
model_alpha_svc,0.947
model_beta_rf,0.941
model_alpha_xgb,0.941
model_beta_svc,0.941
model_alpha_rf,0.934
model_alpha_svc_feature,0.934
model_beta_rf_feature,0.927
model_alpha_xgb_feature,0.927
model_beta_svc_feature,0.927




* Notes: 

    - The `model_beta_xgb` and the `model_alpha_svc` show the best AUC scores. To select a model, I'll choose the `model_beta_xgb` because it has fewer features.
    
    - In this problem, the worst error that the model can make is predicting a cell as benign when it is actually malignant. To reduce this error, I'll focus on improving the recall for malignant cells (class 1 in our case). The current recall rate is 97%, compared to a precision rate of 86%.

# Fim