In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_validate, train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay
from matplotlib import pyplot as plt

In [None]:
def one_hot_encode(attributes):
    attributes_ = pd.get_dummies(attributes)
    return attributes_


def train(
    model, 
    attributes, 
    labels, 
    n_folds, 
    GS_grid=None):
    
    X = attributes.to_numpy()
    y = labels.to_numpy()
    
    if GS_grid:
        print("Grid search...")
        search = GridSearchCV(model, GS_grid, cv=n_folds, scoring="accuracy")
        search.fit(X, y)
        print(f"Best model: {search.best_estimator_} /n Best params: {search.best_params_} /n Accuracy: {search.best_score_}")

        return search, X, y
    
    else:
        cv = cross_validate(
            model, X, y, cv=n_folds, 
            return_estimator=True, 
            scoring="accuracy")
        
        print(f"Accuracy: {cv['test_score'].mean()}% +- {cv['test_score'].std()}%" )
        
        return cv["estimator"][-1], X, y
    

def get_confusion_matrix(trained_model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    ConfusionMatrixDisplay.from_estimator(trained_model, X_test, y_test)


def get_roc_auc(trained_models, Xs, ys):
    plot = None
    for trained_model, X, y in zip(trained_models, Xs, ys):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        if not isinstance(plot, RocCurveDisplay):
            plot = RocCurveDisplay.from_estimator(trained_model, X_test, y_test)
            
        else:
            RocCurveDisplay.from_estimator(trained_model, X_test, y_test, ax=plot)
    
    