In [3]:
import numpy as np
from my_evaluation import my_evaluation
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from collections import Counter
from pprint import pprint
from copy import deepcopy
from pdb import set_trace

In [1]:
class my_evaluation:
    # Binary class or multi-class classification evaluation
    # Each data point can only belong to one class

    
    
    
    def __init__(self, predictions, actuals, pred_proba=None):
        # inputs:
        # predictions: list of predicted classes
        # actuals: list of ground truth
        # pred_proba: pd.DataFrame of prediction probability of belonging to each class
        self.predictions = np.array(predictions)
        self.actuals = np.array(actuals)
        self.pred_proba = pred_proba
        if type(self.pred_proba)!=type(None):
            self.classes_ = list(self.pred_proba.keys())
        else:
            self.classes_ = list(set(list(self.predictions)+list(self.actuals)))
        self.confusion_matrix = None

    
    
    
    def confusion(self):
        # compute confusion matrix for each class in self.classes_
        # self.confusion_matrix = {self.classes_[i]: {"TP":tp, "TN": tn, "FP": fp, "FN": fn}}
        # no return variables
        # write your own code below

        correct = self.predictions == self.actuals
        wrong = self.predictions != self.actuals
        self.acc = float(Counter(correct)[True])/len(correct)
        self.confusion_matrix = {}
        
        for label in self.classes_:
            tp = 0
            fp = 0
            fn = 0
            tn = 0
            for i in range(len(self.actuals)):
                # what is tp ? when actual is setosa and predicted is setosa
                if self.predictions[i] == label and self.actuals[i] == label:
                    tp = tp + 1
                
                # what is fp ? when actual is not setosa but predicted is setosa
                if self.predictions[i] == label and self.actuals[i] != label:
                    fp = fp + 1
                
                # what is fn ? when actual is setosa but predicted is not setosa
                if self.predictions[i] != label and self.actuals[i] == label:
                    fn = fn + 1
                    
                # what is tn ? when actual is not setosa predicted is also not setosa
                if self.predictions[i] != label and self.actuals[i] != label:
                    tn = tn + 1
                
                # filling up confusion matrix for each label
                self.confusion_matrix[label] = {"TP":tp, "TN": tn, "FP": fp, "FN": fn}
                
                
        print("Confusion Matrix:")
        print(self.confusion_matrix)
        print()
        return
        

   


    def accuracy(self):
        if self.confusion_matrix==None:
            self.confusion()
        return self.acc


    
    
    
    
    def precision(self, target=None, average = "macro"):
        # compute precision
        # target: target class (str). If not None, then return precision of target class
        # average: {"macro", "micro", "weighted"}. If target==None, return average precision
        # output: prec = float
        # note: be careful for divided by 0

        if self.confusion_matrix==None:
            self.confusion()
            
        if target in self.classes_:
            tp = self.confusion_matrix[target]["TP"]
            fp = self.confusion_matrix[target]["FP"]
            if tp+fp == 0:
                prec = 0
            else:
                prec = float(tp) / (tp + fp)
        else:
            if average == "micro":
                prec = self.accuracy()
            else:
                prec = 0
                n = len(self.actuals)
                for label in self.classes_:
                    tp = self.confusion_matrix[label]["TP"]
                    fp = self.confusion_matrix[label]["FP"]
                    if tp + fp == 0:
                        prec_label = 0
                    else:
                        prec_label = float(tp) / (tp + fp)
                    if average == "macro":
                        ratio = 1 / len(self.classes_)
                    elif average == "weighted":
                        ratio = Counter(self.actuals)[label] / float(n)
                    else:
                        raise Exception("Unknown type of average.")
                    prec += prec_label * ratio
        return prec

    
    
    
    def recall(self, target=None, average = "macro"):
        # compute recall
        # target: target class (str). If not None, then return recall of target class
        # average: {"macro", "micro", "weighted"}. If target==None, return average recall
        # output: recall = float
        # note: be careful for divided by 0
        
        if self.confusion_matrix==None:
            self.confusion()

        if target in self.classes_:
            tp = self.confusion_matrix[target]["TP"]
            fn = self.confusion_matrix[target]["FN"]
            if tp + fn == 0:
                rec = 0
            else:
                rec = float(tp) / ( tp + fn )
        else:
            if average == "micro":
                rec = self.accuracy()
            else:
                rec = 0
                n = len(self.classes_)
                for label in self.classes_:
                    tp = self.confusion_matrix[label]["TP"]
                    fn = self.confusion_matrix[label]["FN"]
                    if tp + fn == 0:
                        rec_label = 0
                    else:
                        rec_label = float(tp) / (tp + fn )
                    if average == "macro":
                        ratio = 1 / len(self.classes_)
                    elif average == "weighted":
                        ratio = Counter(self.actuals)[label] / float(n)
                    else:
                        raise Exception("Unknown type of average.")
                    rec += rec_label * ratio
        return rec


    
    
    def f1(self, target=None, average = "macro"):
        # compute f1
        # target: target class (str). If not None, then return f1 of target class
        # average: {"macro", "micro", "weighted"}. If target==None, return average f1
        # output: f1 = float
        
        if self.confusion_matrix == None:
            self.confusion()
        
        if target in self.classes_:
            rec = self.recall(target, average)
            prec = self.precision(target, average)
            
            if rec + prec == 0:
                f1_score = 0
            else:
                f1_score = 2 * ((prec * rec) / (prec + rec))
        else:
            if average == "micro":
                f1_score = self.accuracy()
            else:
                f1_score = 0
                n = len(self.actuals)
                
                for label in self.classes_:
                    rec = self.recall(label, average)
                    prec = self.precision(label, average)
                    
                    if rec + prec == 0:
                        f1_score_label = 0
                    else:
                        f1_score_label = 2 * float((prec * rec) / (prec + rec))
                    if average == "macro":
                        ratio = 1 / len(self.classes_)
                    elif average == "weighted":
                        ratio = Counter(self.actuals)[label] / float(n)
                    else:
                        raise Exception("unknown type of average")
                    
                    f1_score += f1_score_label * ratio

        
        return f1_score

    
    
    
    
    
    
    def auc(self, target):
        # compute AUC of ROC curve for each class
        # return auc = {self.classes_[i]: auc_i}, dict
        
        if type(self.pred_proba)==type(None):
            return None
        else:
            if target in self.classes_:
                order = np.argsort(self.pred_proba[target])[::-1]
                tp = 0
                fp = 0
                fn = Counter(self.actuals)[target]
                tn = len(self.actuals) - fn
#                 print(fn)
#                 print(tn)
#                 set_trace()
                
                # Pre-TPR calculations
                if tp + fn == 0:
                    tpr = 0
                else:
                    tpr = tp / (tp + fn)
                
                # pre-FPR calculations
                if tn + fp == 0:
                    fpr = 0
                else:
                    fpr = fp / (fp + tn)
                
                
                auc_target = 0
                
                for i in order:
                    # when order is Y
                    if self.actuals[i] == target:
                        tp = tp + 1
                        fn =  Counter(self.actuals)[target]
                        if fn == 0:
                            tpr = 0
                        else:
                            tpr = tp /  fn
                        
                    else:
                        # when oder is N
                        fp = fp + 1
                        tn = len(self.actuals) - fn
                        pre_fpr = fpr
                        
                        if tn == 0:
                            fpr = 0
                        else:
                            fpr = fp / (tn)
                        
                        if fpr != pre_fpr:
                            auc_target += (tpr * (fpr - pre_fpr))
            else:
                raise Exception("Unknown target class.")

            return auc_target




In [7]:
# Load training data
data_train = pd.read_csv("../data/Iris_train.csv")
# Separate independent variables and dependent variables
independent = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]
X = data_train[independent]
y = data_train["Species"]
# Fit model
clf = DecisionTreeClassifier(criterion="entropy", max_depth=2)
clf.fit(X, y)
# Predict on training data
predictions = clf.predict(X)
print("Predictions:")
print(predictions)
print()
# Predict probabilities
probs = clf.predict_proba(X)
probs = pd.DataFrame({key: probs[:, i] for i, key in enumerate(clf.classes_)})
# Evaluate results
metrics = my_evaluation(predictions, y, probs)
result = {}
for target in clf.classes_:
    result[target] = {}
    result[target]["prec"] = metrics.precision(target)
    result[target]["recall"] = metrics.recall(target)
    result[target]["f1"] = metrics.f1(target)
    result[target]["auc"] = metrics.auc(target)
print()   
print("Results:")
print(result)
print()
f1 = {average: metrics.f1(target=None, average=average) for average in ["macro", "micro", "weighted"]}
print("Average F1 scores: ")
print(f1)

Predictions:
['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'I