In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

In [2]:
# This class is adapted from code written by my GA colleague Patrick Wales-Dinan
class GridSearchContainer:
    def __init__(self, past_gsc_df_path = None):
        self.model_params = {}
        self.best_models = []
        if past_gsc_df_path:
            self.model_df = pd.read_csv(past_gsc_df_path,index_col="Unnamed: 0")
        else:
            self.model_df = pd.DataFrame
        self.count = 0
        
    def set_data(self, X_train, y_train):
        self.X_train, self.y_train = X_train, y_train
        
    def search(self, estimator, params, cv=5, mod_name='model', evaluator='accuracy'):
            '''
            GridSearchContainer Search Method
                Arguments:
                    estimator = model
                    params = target series
                    mod_name = model name for display
                    evaluator = 'accuracy' or 'precison' or 'recall'
            '''
            gs = GridSearchCV(estimator,
                  param_grid = params,
                  cv = cv,
                  scoring = evaluator)
    
            gs.fit(self.X_train, self.y_train)

            gs.best_params_[evaluator] = gs.best_score_
                
            self.model_params[f'{mod_name}_{self.count}'] = gs.best_params_

            self.model_df = pd.DataFrame.from_dict(self.model_params, orient='index')
            self.model_df.sort_values(by=evaluator, ascending=False, inplace=True)

            self.best_models.append((gs.best_estimator_, gs.best_score_))
            
            self.count+=1
            
    def save_grid(self, path="./grid_searches.csv"):
            
            self.model_df.to_csv(path)

In [2]:
# This function is adapted from code written by my GA colleague Tony Lucci
def report_model_evalution(model, X_test, y_test, savefig=False, figname="confusion_matrix", path="../images/"):

    y_preds = model.predict(X_test)
    
    tn, fp, fn, tp = confusion_matrix(y_test, y_preds).ravel()

    array = [[tp, fp],
             [fn, tn]]

    df_cm = pd.DataFrame(array, [True, False], [True, False])
    plt.figure(figsize=(10,7))
    ax = plt.axes()
    sns.set(font_scale=1.4)
    heatmap = sns.heatmap(df_cm, annot=True, annot_kws={"size": 16}, cmap="Blues", fmt="g")
    ax.set_title("Confusion Matrix of Protein-Protein Interactions",pad=20)
    ax.set_xlabel("Actual")
    ax.set_ylabel("Predicted")
    
    if savefig:
        fig = heatmap.get_figure()
        fig.savefig(path+figname+".png")


    print('False'.center(40, "-"))
    print(f'(0,0) True Negative - {tn}')
    print(f'(0,1) False Positive - {fp}')
    print('True'.center(40, "-"))
    print(f'(1,0) False Negative - {fn}')
    print(f'(1,1) True Positive - {tp}')
    print()

    # Model Prediction
    accu = (tn + tp) / (tn + tp + fn + fp)
    print(f'Accuracy: {round(accu * 100, 4)}% - (1) Correct Predictions / ALL predictions')
    print('---------------------------------------------------------------------------------------')

    # Calculate the specificity
    spec = tn / (tn + fp)
    print(f'Specificity: {round(spec * 100, 4)}% - (0) Correctly Predicted NO INTERACTIONS / ALL NO INTERACTIONS protein pairs')
    print('---------------------------------------------------------------------------------------')

    # Sensitivity
    sens = tp / (tp + fn)
    print(F'Sensitivity: {round(sens * 100, 4)}% - (1) Correctly Predicted INTERACTIONS / ALL INTERACTIONS protein pairs')
    print('---------------------------------------------------------------------------------------')

    # Precision
    prec = tp / (tp + fp)
    print(f'Precision: {round(prec * 100, 4)}% - (1) Correctly Predicted INTERACTIONS / Predicted INTERACTIONS protein pairs')
    print('---------------------------------------------------------------------------------------')