 Here we define some handy visualization functions, we'll import these from the other notebook, just keeps things less cluttered

In [1]:
#@title Visualization Functions
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix
import seaborn as sb
import math
import mplhep 
mplhep.style.use("CMS")

def variablePlots( modelname, sig_df, bkg_df, training_vars):
    # format figure
    plt.figure(figsize=(24, 32))
    plt.subplots_adjust(wspace=0.3, hspace=0.3)

    nrows = math.ceil(len(training_vars) / 2) 
    nplt = 1

    for var in training_vars:
        ax = plt.subplot( nrows, 2, nplt)
        ax.hist( bkg_df[var], weights=bkg_df['evtwt'], histtype='step', color='red', label='Background', bins=50, range=(-5, 5))
        ax.hist( sig_df[var], weights=sig_df['evtwt'], histtype='step', color='blue', label='Signal', bins=50, range=(-5, 5))

        ax.set_xlabel(var)
        ax.set_ylabel('Events/Bin')
        ax.legend(loc='upper right')
        # plt.savefig(f'plots/{modelname}/varplots_{var}.png')
        # print(f'Created Plot: plots/{modelname}/varplots_{var}.png')
        nplt += 1
    plt.show()
    print(f'Created Plot: varplots')
    plt.clf()

    plt.figure(figsize=(12, 16))
    to_drop = ['evtwt', 'event', 'idx', 'isSignal']

    bkg_df = bkg_df.drop( to_drop, axis=1)
    bkg_df = bkg_df[training_vars]
    corrMatrix_bkg = bkg_df.corr()
    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corrMatrix_bkg, dtype=bool))
    plt.figure(figsize=(8, 8))
    # plt.subplots_adjust(bottom=0.4)
    # plt.subplots_adjust(top=0.0)
    # plt.subplots(1,1,figsize=(16,16))
    sb.heatmap( corrMatrix_bkg, square=True, mask=mask, annot=False, fmt=".1f", cmap='vlag', vmin=-1, vmax=1)
    plt.show()
    print(f'Created Plot: CorrelationMatrix_Background')
    plt.clf()
    # plt.savefig(f'plots/{modelname}/CorrelationMatrix_QCD.png')
    # print(f'Created Plot: plots/{modelname}/CorrelationMatrix_QCD.png')

    sig_df = sig_df.drop( to_drop, axis=1)
    sig_df = sig_df[training_vars]
    corrMatrix_sig = sig_df.corr()
    plt.subplots(1,1,figsize=(8,8))
    sb.heatmap( corrMatrix_sig, square=True, mask=mask, annot=False, fmt=".1f", cmap='vlag', vmin=-1, vmax=1)
    plt.show()
    print(f'Created Plot: CorrelationMatrix_Signal')
    plt.clf()
    # plt.savefig(f'plots/{modelname}/CorrelationMatrix_WJetsTo3Pi_.png')
    # print(f'Created Plot: plots/{modelname}/CorrelationMatrix_Wto3Pi.png')

def variablePlots_single( name, df, training_vars):
    plt.figure(figsize=(12, 16))
    plt.subplots_adjust(wspace=0.3, hspace=0.3)
    nrows = math.ceil(len(training_vars) / 2) 
    nplt = 1
    for var in training_vars:
        ax = plt.subplot( nrows, 2, nplt)
        ax.hist( df[var], weights=df['evtwt'], histtype='step', color='black', label=name, bins=50, range=(-5, 5))
        ax.set_xlabel(var)
        ax.set_ylabel('Events/Bin')
        nplt += 1
    plt.show()
    print(f'Created Plot: varplots')
    plt.clf()

def compare_ROC_curve( name, model, data_1, labels_1, title_1, data_2, labels_2, title_2):
    # use the model to do classifications, first training
    label_predict = model.predict(data_1)
    fpr, tpr, _ = roc_curve(
        labels_1, label_predict[:, 0])#, sample_weight=weights)  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot( fpr, tpr, lw=2, color='red', label=f'{title_1} auc = {roc_auc:.3f}')

    # Now testing...
    label_predict = model.predict( data_2)
    fpr, tpr, _ = roc_curve( labels_2, label_predict[:, 0])  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot( fpr, tpr, lw=2, color='blue', label=f'{title_2} auc = {roc_auc:.3f}')

    # Beautify
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2,
             color='k', label='random chance')
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.0])
    plt.xlabel('false positive rate')
    plt.ylabel('true positive rate')
    plt.title(f'receiver operating curve')
    plt.legend(loc="lower right")
    plt.grid()
    outname = f'plots/{name}/ROC_curve_{name}_{title_1.replace(" ", "_")}_VS_{title_2.replace(" ", "_")}.png'
    print('Created Plot: ROC_curve_{title_1.replace(" ", "_")}_VS_{title_2.replace(" ", "_")}')
    # plt.savefig(f'{outname}')
    # print(f'Created Plot: {outname}')

def output_ROC_curve( name, model, output_data, output_labels):
    plt.figure(figsize=(12, 8))

    # use the model to do classifications, first training
    label_predict = model.predict(output_data)

    if len(label_predict) != len(output_labels):
        print('Lengths don\'t match!')
        print(f'outputs: {len(output_predict)}')
        print(f'labels: {len(output_labels)}')
        return

    fpr, tpr, _ = roc_curve(
        output_labels, label_predict[:, 0])#, sample_weight=weights)  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot( fpr, tpr, lw=2, color='red', label='Output NN auc = %.3f' % (roc_auc))

    # Beautify
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2,
             color='k', label='random chance')
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.0])
    plt.xlabel('false positive rate')
    plt.ylabel('true positive rate')
    plt.title(f'receiver operating curve')
    plt.legend(loc="lower right")
    plt.grid()
    # plt.savefig(f'plots/{name}/output_ROC_curve_{name}.png')
    print(f'Created Plot: plots/{name}/output_ROC_curve_{name}.png')

def ROC_curve( name, model, training_data, training_labels, testing_data, testing_labels):
    plt.figure(figsize=(12, 8))

    # use the model to do classifications, first training
    label_predict = model.predict( np.array(training_data))
    fpr, tpr, _ = roc_curve(
        training_labels, label_predict[:, 0])#, sample_weight=weights)  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot( fpr, tpr, lw=2, color='red', label='Train NN auc = %.3f' % (roc_auc))

    # Now testing...
    label_predict = model.predict( np.array(testing_data))
    fpr, tpr, _ = roc_curve( testing_labels, label_predict[:, 0])  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot( fpr, tpr, lw=2, color='blue', label='Test NN auc = %.3f' % (roc_auc))

    # Beautify
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2,
             color='k', label='random chance')
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.0])
    plt.xlabel('false positive rate')
    plt.ylabel('true positive rate')
    plt.title(f'receiver operating curve')
    plt.legend(loc="lower right")
    plt.grid()
    plt.show()
    print(f'Created Plot: ROC_curve')
    plt.clf()
    # plt.savefig(f'plots/{name}/ROC_curve_{name}.png')
    # print(f'Created Plot: plots/{name}/ROC_curve_{name}.png')

def ROC_curve_single( name, model, data, label, color):
    if color == 'red':
        tag = 'Train'
    else:
        tag = 'Test'

    # use the model to do classifications
    label_predict = model.predict(data)
    fpr, tpr, _ = roc_curve(
        label, label_predict[:, 0])#, sample_weight=weights)  # calculate the ROC curve

    roc_auc = auc(fpr, tpr)
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2,
             color='k', label='random chance')
    plt.plot( fpr, tpr, lw=2, color=color, label=tag+' NN auc = %.3f' % (roc_auc))
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.0])
    plt.xlabel('false positive rate')
    plt.ylabel('true positive rate')
    plt.title('receiver operating curve')
    plt.legend(loc="lower right")
    plt.grid()
    print(f'Created Plot: ROC_curve')
    # plt.savefig(f'plots/{name}/ROC_curve_{name}_{tag}.png')
    # print(f'Created Plot: plots/{name}/ROC_curve_{name}_{tag}.png')


def trainingPlots_Single( name, history, log=False):
    plt.figure(figsize=(10, 5))

    # plot loss vs epoch
    plt.title(f'History')
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.grid(True)

    minVal = np.min( history.history['loss'])
    minVal = min( minVal, np.min( history.history['val_loss']))
    minVal = round( minVal, 1)
    minLim = minVal - 0.1 if minVal > 0.2 else 0.1

    plt.legend(loc="upper right")
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.ylim([minLim, 1.0])
    if log: plt.yscale('log')
    plt.show()
    print(f'Created Plot: historyLoss')
    plt.clf()
    # plt.savefig(f'plots/{name}/historyLoss_{name}.png')
    # print(f'Created Plot: plots/{name}/historyLoss_{name}.png')

    # plot accuracy vs epoch
    plt.figure(figsize=(10, 5))
    plt.title(f'History')
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.plot(history.history['val_accuracy'], label='val_accuracy')
    plt.grid(True)
    plt.legend(loc="lower right")
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.ylim([0.5, 1.0])
    if log: plt.yscale('log')
    plt.show()
    print(f'Created Plot: historyAccuracy')
    plt.clf()
    # plt.savefig(f'plots/{name}/historyAccuracy_{name}.png')
    # print(f'Created Plot: plots/{name}/historyAccuracy_{name}.png')

def trainingPlots( name, history, log=False):
    plt.figure(figsize=(12, 16))
    plt.subplots_adjust(wspace=0.5, hspace=0.3)

    # plot loss vs epoch
    plt.title(f'History')
    ax = plt.subplot(2, 1, 1)
    ax.plot(history.history['loss'], label='loss')
    ax.plot(history.history['val_loss'], label='val_loss')
    ax.legend(loc="upper right")
    ax.set_xlabel('epoch')
    ax.set_ylabel('loss')
    ax.set_ylim([0.3, 0.7])
    if log: ax.set_yscale('log')

    # plot accuracy vs epoch
    ax = plt.subplot(2, 1, 2)
    ax.plot(history.history['accuracy'], label='accuracy')
    ax.plot(history.history['val_accuracy'], label='val_accuracy')
    ax.legend(loc="lower right")
    ax.set_xlabel('epoch')
    ax.set_ylabel('accuracy')
    ax.set_ylim([0.7, 1.0])
    if log: ax.set_yscale('log')
    plt.show()
    print(f'Created Plot: history')
    plt.clf()
    # plt.savefig(f'plots/{name}/history_{name}.png')
    # print(f'Created Plot: plots/{name}/history_{name}.png')

def discPlot(name, model, train_sig, train_bkg, test_sig, test_bkg, norm, log=False):
    train_sig_pred = model.predict(train_sig)
    train_bkg_pred = model.predict(train_bkg)
    test_sig_pred = model.predict(test_sig)
    test_bkg_pred = model.predict(test_bkg)

    plt.figure(figsize=(12, 8))

    # train_sig['weights'] = train_sig.loc[ :, 'Generator_weight'].multiply( slim_df.loc[:, [ 'PUweight',
    #                                                                                         'PionTriplet_TauSFweight_JetLoose_MuTight_ElVVLoose',
    #                                                                                         'PionTriplet_DxyDz_Weight',
    #                                                                                         'L1PreFiringWeight_Nom']
    #                                                                                    ], axis="index")

    nb, binb, _ = plt.hist(test_bkg_pred, bins=25, range=(0, 1), density=True)
    bin_centers = 0.5*(binb[1:] + binb[:-1])
    ns, bins, _ = plt.hist(test_sig_pred, bins=25, range=(0, 1), density=True)
    bin_centers = 0.5*(bins[1:] + bins[:-1])

    plt.clf()

    plt.title(f'NN Discriminant')
    plt.xlabel('NN Disc.')
    plt.ylabel('Events/Bin')
    if log:
        plt.ylim([0.03, 50])
        plt.yscale('log')

        # weights = [ 1.5639099e-07 ] * len(train_sig_pred)
        plt.hist(train_bkg_pred, histtype='stepfilled', color='red', label='Background Train', bins=50, range=(0,1), density=True, alpha=0.5)
        plt.hist(train_sig_pred, histtype='stepfilled', color='blue', label='Signal Train', bins=50, range=(0,1), density=True, alpha=0.5)

        plt.errorbar(y=nb, x=bin_centers, yerr=np.sqrt(nb)*.1, fmt='o', color='blue', label='Background Test')
        plt.errorbar(y=ns, x=bin_centers, yerr=np.sqrt(ns)*.1, fmt='o', color='red', label='Signal Test')

        plt.legend(loc='upper center')
        plt.show()
        print(f'Created Plot: NN_disc_log')
        plt.clf()
        # plt.savefig(f'plots/{name}/NN_disc_{name}_log.png')
        # print(f'Created Plot: plots/{name}/NN_disc_{name}_log.png')

    # Now linear...
    plt.title(f'NN Discriminant')
    plt.xlabel('NN Disc.')
    plt.ylabel('Events/Bin')

    plt.hist(train_bkg_pred, histtype='stepfilled', color='red', label='Background Train', bins=25, range=(0,1), density=True, alpha=0.5)
    plt.hist(train_sig_pred, histtype='stepfilled', color='blue', label='Signal Train', bins=25, range=(0,1), density=True, alpha=0.5)

    plt.errorbar(y=nb, x=bin_centers, yerr=np.sqrt(nb)*.1, fmt='o', color='blue', label='Background Test')
    plt.errorbar(y=ns, x=bin_centers, yerr=np.sqrt(ns)*.1, fmt='o', color='red', label='Signal Test')

    plt.legend(loc='upper center')
    plt.show()
    print(f'Created Plot: NN_disc')
    plt.clf()
    # plt.savefig(f'plots/{name}/NN_disc_{name}.png')
    # print(f'Created Plot: plots/{name}/NN_disc_{name}.png')

def confusionMatrix( name, model, data, labels, threshold=0.5):
    predictions = model.predict(data)
    cm = confusion_matrix(labels, predictions > threshold, normalize='true')
    plt.figure(figsize=(10,10))
    sb.heatmap(cm, annot=True, fmt="f")
    plt.title('Confusion matrix @{:.2f}'.format(threshold))
    plt.ylabel('Actual label', loc='center')
    plt.xlabel('Predicted label', loc='center')
    plt.show()
    print(f'Created Plot: confusionMatrix_{str(threshold).replace(".", "p")}')
    plt.clf()

    print('>>> Confusion Matrix...')
    print(f'\tBackground Events Correctly Detected (True Negatives): {cm[0][0]:.3f}')
    print(f'\tBackground faking Signal (False Positives): {cm[0][1]:.3f}')
    print(f'\tSignal faking Background (False Negatives): {cm[1][0]:.3f}')
    print(f'\tSignal Events Correctly Detected (True Positives): {cm[1][1]:.3f}')
    print(f'\tTotal Signal Events: {np.sum(cm[1])}')
    # plt.savefig(f'plots/{name}/confusionMatrix_{str(threshold).replace(".", "p")}_{name}.png')
    # print(f'Created Plot: plots/{name}/confusionMatrix_{str(threshold).replace(".", "p")}_{name}.png')
