In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.metrics import  confusion_matrix, plot_roc_curve

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn



### Presentation Functions

In [None]:
def print_scores(cv_results, best_idx = None):
  '''
  Prints out the scoring metrics generated from cross_validate or GridSearchCV functions 
  --------------------------------------------------------------------------------------
  cv_results: Dict of scoring metrics.
  best_idx: Specifies the index of the best model scores.

  '''
  if 'mean_test_accuracy' in cv_results.keys():
    # mean_test_accuracy is a GridSearchCV key, so use the index of the best model for each metric
    test_accuracy, train_accuracy = cv_results['mean_test_accuracy'][best_idx], cv_results['mean_train_accuracy'][best_idx]
    test_f1, train_f1 = cv_results['mean_test_f1'][best_idx], cv_results['mean_train_f1'][best_idx] 
    test_auc, train_auc = cv_results['mean_test_roc_auc'][best_idx], cv_results['mean_train_roc_auc'][best_idx]
  else:
    # Metrics were generated from cross_validate, so take the mean of each array for average performance
    test_accuracy, train_accuracy  = np.mean(cv_results['test_accuracy']), np.mean(cv_results['train_accuracy'])
    test_f1, train_f1 = np.mean(cv_results['test_f1']), np.mean(cv_results['train_f1'])
    test_auc, train_auc  = np.mean(cv_results['test_roc_auc']), np.mean(cv_results['train_roc_auc'])
     

  print(f"Average accuracy-- test: {test_accuracy}, train: {train_accuracy}")
  print(f"Average f1-- test: {test_f1}, train: {train_f1}")
  print(f"Average roc_auc-- test roc_auc: {test_auc}, train: {train_auc}")

def create_plots(model, X, y_true):
  '''
  Plots the confusion matrix and ROC curve on the same figure.
  ------------------------------------------------------------
  model: Fitted model
  X: Values to make predictions from
  y_true: Actual y values
  '''
  fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,4))

  y_pred = model.predict(X)
  conf_matrix = confusion_matrix(y_true, y_pred)
  sns.heatmap(conf_matrix, annot=True, ax=ax1)

  plot_roc_curve(model,X,y_true, ax=ax2)  

### Cross-Validation Functions

In [None]:
def grid_search_cv(model, grid, X,y, cv=5):
    scoring_metrics = ['accuracy', 'f1', 'roc_auc']
    gs = GridSearchCV(estimator=model, param_grid=grid, cv=cv, return_train_score=True, scoring=scoring_metrics, refit='f1')
    gs.fit(X, y)
    
    print_scores(gs.cv_results_, gs.best_index_)
    create_plots(gs, X,y)
    return gs.best_params_

def evaluate_model(model, X, y, cv=4):
  scoring_metrics = ['accuracy', 'f1', 'roc_auc']
  cv_results = cross_validate(model, X, y, cv=cv, return_train_score=True, scoring=scoring_metrics)
  print_scores(cv_results)
  
  model.fit(X,y)
  create_plots(model, X, y)