In [None]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import toolbox_02450 as tb

In [None]:
class ensemble:
    def plot_confusion_matrix(
        self, cm, title="Confusion matrix", cmap=plt.cm.get_cmap(name="Blues")
    ):
        plt.imshow(cm, interpolation="nearest", cmap=cmap)
        plt.title(title)
        plt.colorbar
        tick_marks = np.arange(2)
        plt.xticks(tick_marks, ["positive", "negative"])  # , rotation=45)
        plt.yticks(tick_marks, ["positive", "negative"])
        plt.tight_layout
        plt.ylabel("True label")
        plt.xlabel("Predicted label")
        fmt = "d"  #'.2f' if normalize else 'd'
        thresh = cm.max() / 2.0
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(
                j,
                i,
                format(cm[i, j], fmt),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black",
            )
        plt.show()

    def conf_matrix_stats(self, TP, FN, FP, TN, stats):
        """
        makes confusion matrix

        TP = true positive
        FN = false negative
        FP = false positive
        FN = false negative
        stats = list of stats to output, should be inputtet as string. Options are:
                precision = "p"
                recall = "r"
                accuracy = "acc"
                error = "err"
                true positive rate = "tpr"
                false positive rate = "fpr"
                show confususion matrix = "show"
                F-measure = "f_meas"
                Receiver operating characteristic plot (TPR~FPR plot) = "roc"
                show table with all values (list not necessary)= "all"
        """

        # calculations
        cm = np.array([[TP, FN], [FP, TN]])
        N = TP + FN + FP + TN
        p = TP / (TP + FP)
        r = TP / (TP + FN)
        acc = (TP + TN) / N
        err = (FN + FP) / N
        TPR = TP / (TP + FN)
        FPR = FP / (TN + FP)
        F = (2*p*r)/(p+r)

        if "p" in stats:
            print("The precision is {}".format(p))

        if "r" in stats:
            print("The recall is {}".format(r))

        if "acc" in stats:
            print("The accuracy is {}".format(acc))

        if "err" in stats:
            print("The error is {}".format(err))

        if "tpr" in stats:
            print("The true positive rate is {}".format(TPR))

        if "fpr" in stats:
            print("The false positive rate is {}".format(FPR))

        if "show" in stats:
            self.plot_confusion_matrix(cm)

        if "roc" in stats:
            plt.plot(FPR, TPR)
            plt.title("Receiver operating characteristic")
            plt.ylabel("TPR")
            plt.xlabel("FPR")
            plt.show()
        if "f_meas" in stats:
            print ("The F measure is {}".format(F))
        if "all" in stats:
            all = pd.DataFrame(
                {
                    "Stat": ["Precision", "Recall", "Accuracy", "Error", "TPR", "FPR","F-measure"],
                    "Value": [p, r, acc, err, TPR, FPR,F],
                }
            )
            print(all)

    def make_conf_matrix(self, true_val, pred_val, show_conf_matrix=False):
        """
        calculates values for a confusion matrix and various stats
        ------------------------------------------------
        parameters:
        --------------------------------------------
        true_val = list of the correct labels, must be binarised so that 1 = positve class and 0 = negative class
        pred_val = list of the predicted labels, must be binarised so that 1 = positve class and 0 = negative class

        returns the amount of true positives, false positives, false negatives and true negatives
        """
        true_val = np.array(true_val)
        pred_val = np.array(pred_val)

        pred_pos = true_val[pred_val == 1]
        pred_neg = true_val[pred_val == 0]

        TP = np.sum(pred_pos == 1)
        FP = np.sum(pred_pos == 0)
        FN = np.sum(pred_neg == 1)
        TN = np.sum(pred_neg == 0)

        if show_conf_matrix:
            stats = ["all", "show"]
        else:
            stats = "all"

        self.conf_matrix_stats(TP=TP, FP=FP, FN=FN, TN=TN, stats=stats)

        return TP, FP, FN, TN

    def plot_roc(self, true_val, pred_val):
        """
        calculates the fpr and tpr and plots a roc curve
        to compare the outputtet graph with the possible answers, look at where the plot has a elbow
        -----------------------------------------------
        parameters:
        -----------
        true_val = list of the correct labels, must be binarised so that 1 = positve class and 0 = negative class
        pred_val = list of the predicted labels, must be binarised so that 1 = positve class and 0 = negative class

        returns the area under the curve (AUC)
        """
        fpr, tpr, _ = metrics.roc_curve(true_val, pred_val)
        roc_auc = metrics.auc(fpr, tpr)

        plt.title("Receiver Operating Characteristic")
        plt.plot(fpr, tpr, "b", label="AUC = %0.2f" % roc_auc)
        plt.legend(loc="lower right")
        plt.plot([0, 1], [0, 1], "r--")
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel("True Positive Rate")
        plt.xlabel("False Positive Rate")
        plt.show()

        return roc_auc
    def plot_roc_pred(self,truth,probabilities):
        """Generates a ROC curve from true labels and predicted class probabilities

        Args:
            truth (list): List with true class labels (can also be a prediction from a model)
            probabilities (list): List with predicted class probabilities
        """
        plt.figure(1)
        tb.rocplot(probabilities, truth)

        plt.show()  