In [1]:
import numpy as np

def euclidean_distance(vec1, vec2):
    return np.linalg.norm(vec1-vec2)

In [2]:
def manhattan_distance(vec1, vec2):
    vector_abs = np.abs(vec1-vec2)
    return np.sum(vector_abs)

In [3]:
def accuracy_generalization_error(predicted, actual):
    accuracy = np.sum(predicted == actual)
    return accuracy/len(actual), 1-(accuracy/len(actual))

In [4]:
def precision(true_pos, false_pos):
    return true_pos/(true_pos + false_pos)

def recall(true_pos, false_neg):
    return true_pos/(true_pos+false_neg)

def f1_score(prec, rec):
    return 2*((prec*rec)/(prec+rec))

In [5]:
def confusion_matrix(predicted, actual):
    tn = 0
    tp = 0
    fn = 0
    fp = 0
    for i in range(0, len(predicted)):
        if predicted[i] == actual[i] and actual[i] == 0:
            tn += 1
        elif predicted[i] == actual[i] and actual[i] == 1:
            tp += 1
        elif predicted[i] != actual[i] and actual[i] == 0:
            fp += 1
        elif predicted[i] != actual[i] and actual[i] == 1:
            fn += 1
            
    return np.array([[tn, fp],[fn, tp]])

In [3]:
import numpy as np
def roc_curve(true_classes, class_probabilities):
    """Given true classes and class probabilities calculates fpr and tpr values and returns them as lists that can be used to construct a ROC curve"""
    true_classes, class_probabilities = np.asarray(true_classes) , np.asarray(class_probabilities)
    tpr_values=[]
    fpr_values=[]
    for threshold in range(100):
        predicted_classes=(class_probabilities > float(threshold/100.)).astype(int)
        fpr=np.sum(predicted_classes[np.where(predicted_classes == 1 )]  != true_classes[np.where(predicted_classes == 1 )])/true_classes[np.where(true_classes == 0 )].shape[0]
        tpr=np.sum(predicted_classes[np.where(predicted_classes == 1 )]  == true_classes[np.where(predicted_classes == 1 )])/true_classes[np.where(true_classes == 1 )].shape[0]
        fpr_values.append(fpr)
        tpr_values.append(tpr)
    return fpr_values,tpr_values

In [None]:
def auc(true_classes, class_probabilities):
    '''Calculates area under the roc curve by calling the roc curve function to calculate tpr and fpr'''
    x,y=roc_curve(true_classes, class_probabilities)
    auc=abs(np.trapz(y,x, axis=-1))
    return auc

In [23]:
def pr_curve(true_classes, class_probabilities):
    """Given true classes and class probabilities calculates precision and recall values and returns them and the thresholds as lists that can be used to construct a PR curve"""
    true_classes, class_probabilities = np.asarray(true_classes) , np.asarray(class_probabilities)
    p_values=[]
    r_values=[]
    thresholds=[]
    for threshold in range(100):
        predicted_classes=(class_probabilities > float(threshold/100.)).astype(int)
        p=np.sum(predicted_classes[np.where(predicted_classes == 1 )]  == true_classes[np.where(predicted_classes == 1 )])/predicted_classes[np.where(predicted_classes == 1 )].shape[0]
        r=np.sum(predicted_classes[np.where(predicted_classes == 1 )]  == true_classes[np.where(predicted_classes == 1 )])/true_classes[np.where(true_classes == 1 )].shape[0]
        p_values.append(p)
        r_values.append(r)
        thresholds.append(threshold)
    return p_values,r_values, thresholds
