In [3]:
import numpy as np

In [14]:
class Box2D:
    def __init__(self, xmin, ymin, xmax, ymax, label=None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.label = label
        
    def calc_area(self):
        w = self.xmax - self.xmin
        h = self.ymax - self.ymin
        return w*h
    
    def calc_iou(self, other):
        # Compute coordinates of intersection
        x1 = max(self.xmin, other.xmin)
        x2 = min(self.xmax, other.xmax)
        y1 = max(self.ymin, other.ymin)
        y2 = min(self.ymax, other.ymax)
        
        inter_area = max(x2 - x1, 0.0) * max(y2-y1, 0.0)
        
        iou_ratio = inter_area / (self.calc_area() + other.calc_area() - inter_area)
        return iou_ratio
    
def bb_intersection_over_union(boxA: tuple[int], boxB: tuple[int]):
    # determine the (x, y)-coordinates of the intersection rectangle
    # 
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
    if interArea == 0:
        return 0
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou
    

In [103]:
# Pointing out a wrong IoU implementation in https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
print('Two boxes with overlap')
boxA = [0., 0., 10., 10.]
boxB = [1., 1., 11., 11.]
box2DA = Box2D(0., 0., 10., 10.)
box2DB = Box2D(1., 1., 11., 11.)

correct = bb_intersection_over_union(boxA, boxB)
iou = box2DA.calc_iou(box2DB)
assert(iou == correct), "IOU calc results are different!"
print('Correct solution - also analytical: {0}\n'
      'Solution by published function: {1}\n'
      'Solution by correction (ptyshevs): {2}'.format(correct, '0.704225352113', '0.680672268908'))

print('Two boxes with no overlap')
boxA = [0., 0., 10., 10.]
boxB = [12., 12., 22., 22.]
box2DA = Box2D(0., 0., 10., 10.)
box2DB = Box2D(12., 12., 22., 22.)

correct = bb_intersection_over_union(boxA, boxB)
iou = box2DA.calc_iou(box2DB)
assert(iou == correct), "IOU calc results are different!"
print('Correct solution - also analytical: {0}\n'
      'Solution by published function: {1}\n'
      'Solution by correction (ptyshevs): {2}'.format(correct, '0.0', '0.0204081632653'))

Two boxes with overlap
Correct solution - also analytical: 0.680672268907563
Solution by published function: 0.704225352113
Solution by correction (ptyshevs): 0.680672268908
Two boxes with no overlap
Correct solution - also analytical: 0
Solution by published function: 0.0
Solution by correction (ptyshevs): 0.0204081632653


In [175]:
# Receiving operating characteristic (ROC) graph
# https://towardsdatascience.com/roc-curve-and-auc-from-scratch-in-numpy-visualized-2612bb9459ab
class ROCAUC:
    def __init__(self, gts, ests, thresholds):
        assert(len(gts) == len(ests)), "gts and ests are the same!"
        self.gts = gts
        self.ests = ests
        self.thresholds = thresholds
    
    def calc_tpr_fpr(self):
        """Calculate true positive rate and false positive rate for the given thresholds
        :param gts: ground truth data
        :param ests: estimation data
        """
        gts, ests, thresholds = self.gts, self.ests, self.thresholds
        tpr_list = []
        fpr_list = []
        for thres in thresholds:
            tp, fp, tn, fn = 0, 0, 0, 0
            for gt, est in zip(gts, ests):
                if gt == 1:
                    if est >= thres:
                        tp += 1
                    else:
                        fn += 1
                else:
                    if est >= thres:
                        fp += 1
                    else:
                        tn += 1
            tpr = tp/(tp + fn)
            fpr = fp/(fp + tn)
            tpr_list.append(tpr)
            fpr_list.append(fpr)
        return tpr_list, fpr_list
    
    def true_false_positive(self, threshold_vector, y_test):
        true_positive = np.equal(threshold_vector, 1) & np.equal(y_test, 1)
        true_negative = np.equal(threshold_vector, 0) & np.equal(y_test, 0)
        false_positive = np.equal(threshold_vector, 1) & np.equal(y_test, 0)
        false_negative = np.equal(threshold_vector, 0) & np.equal(y_test, 1)

        tpr = true_positive.sum() / (true_positive.sum() + false_negative.sum())
        fpr = false_positive.sum() / (false_positive.sum() + true_negative.sum())

        return tpr, fpr
    
    def calc_tpr_fpr_np(self):
        gts, ests, thresholds = self.gts, self.ests, self.thresholds
        tpr_list = []
        fpr_list = []
        for thres in thresholds:
            threshold_vector = np.greater_equal(ests, thres).astype(int)
            tpr, fpr = self.true_false_positive(threshold_vector, gts)
            tpr_list.append(tpr)
            fpr_list.append(fpr)
        return tpr_list, fpr_list
    
    def plot(self):
        tpr_list, fpr_list = self.calc_tpr_fpr()
        tpr_list_np, fpr_list_np = self.calc_tpr_fpr_np()
        assert(tpr_list == tpr_list_np)
#         print("tpr_list: {}, tpr_list_np: {}".format(tpr_list, tpr_list_np))
        thresholds = self.thresholds
#         print("tpr: {}\n, fpr: {}\n, thres: {}".format(tpr_list, fpr_list, thresholds))
        plt.plot(fpr_list, tpr_list)
        plt.plot(thresholds,thresholds,'--')
        plt.xlabel("False Positive Rate (FPR)--->")
        plt.ylabel("True Positive Rate (TPR)--->")
        plt.title("ROC Curve")
        plt.show()
        dx_step=0.0002
        area = np.trapz(fpr_list, dx=dx_step)
        print("AUC:Area under the ROC curve is", area)

In [2]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

X, y = make_classification(n_samples=1000, n_informative=10, n_features=20, flip_y=0.2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

prob_vector = model.predict_proba(X_test)[:, 1]
# print("gts: {}, est: {}".format(y_test[:10], prob_vector[:10]))

gts = y_test
ests = prob_vector
thresholds = np.linspace(0, 1, num=100)
roc_auc = ROCAUC(gts, ests, thresholds)
roc_auc.plot()

NameError: name 'ROCAUC' is not defined

In [113]:
p = np.array([0, 1, 2])
print("{}, {}".format(p, p.shape))
newp = p[np.newaxis, :]
print("{}, {}".format(newp, newp.shape))
newp2 = p[:, np.newaxis]
print("{}, {}".format(newp2, newp2.shape))
newp3 = p.reshape(-1, 1)
print("{}, {}".format(newp3, newp3.shape))

[0 1 2], (3,)
[[0 1 2]], (1, 3)
[[0]
 [1]
 [2]], (3, 1)
[[0]
 [1]
 [2]], (3, 1)


In [100]:
class ConfusionMatrix:
    def __init__(self, ests, gts):
        self.label = sorted(set(gts))
        self.matrix = [[0]*len(self.label) for _ in self.label]
        assert(len(ests) == len(gts)), "ests and gts should have the same length."
        self.ests = ests
        self.gts = gts
    
    def calc_cm(self):
        index_map = {key : i for i, key in enumerate(self.label)}
        for est, gt in zip(ests, gts):
            est_idx = index_map[est]
            gt_idx = index_map[gt]
            self.matrix[gt_idx][est_idx] += 1
        return self.matrix

In [101]:
gts = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
ests = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]

conf_matrix = ConfusionMatrix(ests, gts)
cm = conf_matrix.calc_cm()
print("cm: {}".format(cm))

cm: [[3, 0, 0], [0, 1, 2], [2, 1, 3]]
