# ML Fundamentals

## 1. Softmax + Cross Entropy


In [None]:
import math
from typing import List

def softmax(logits: List[float]) -> List[float]:
    # stable softmax
    max_logit = max(logits)
    exps = [math.exp(x - max_logit) for x in logits]
    print(exps)
    s = sum(exps)
    return [e / s for e in exps]

def cross_entropy_from_probs(probs: List[float], true_class: int) -> float:
    """
    probs: output of softmax
    true_class: index of correct class
    """
    # add small epsilon for numerical stability
    eps = 1e-12
    p = max(min(probs[true_class], 1 - eps), eps)
    return -math.log(p)

def cross_entropy_from_logits(logits: List[float], true_class: int) -> float:
    """
    Convenience: CE directly from logits
    """
    probs = softmax(logits)
    return cross_entropy_from_probs(probs, true_class)

# example
logits = [2.0, 1.0, 0.1]
probs = softmax(logits)
print(logits)
print(probs)
loss_cel = cross_entropy_from_logits(logits, true_class=0)
loss_cep = cross_entropy_from_probs(probs, true_class=0)
print(loss_cel)
print(loss_cep)


[1.0, 0.36787944117144233, 0.14956861922263506]
[2.0, 1.0, 0.1]
[0.6590011388859679, 0.24243297070471392, 0.09856589040931818]


In [None]:

def softmax_manual(logits: List[float]) -> List[float]:
    max_logit = max(logits)
    exps = []
    softs = []
    for i in logits:
        exps.append(math.exp(i - max_logit))
    sum_probs = sum(exps)
    for exp_i in exps:
        softs.append(exp_i/sum_probs)
    return softs

logits = [2.0, 1.0, 0.1]
probs = softmax_manual(logits)
print(logits)
print(probs)

[2.0, 1.0, 0.1]
[0.6590011388859679, 0.24243297070471392, 0.09856589040931818]


In [11]:
def cross_entropy_from_probs(probs: List[float], true_class: int) -> float:
    return -math.log(probs[true_class])

def cross_entropy_from_logits(probs: List[float], true_class: int) -> float:
    probs = softmax_manual(probs)
    return cross_entropy_from_probs(probs, true_class)


logits = [2.0, 1.0, 0.1]
probs = softmax_manual(logits)
loss_cel = cross_entropy_from_logits(logits, true_class=0)
loss_cep = cross_entropy_from_probs(probs, true_class=0)
print(loss_cel)
print(loss_cep)

0.4170300162778335
0.4170300162778335


## 2. F1, IoU, TPR, FPR (binary classification)

In [13]:
from typing import Tuple

def confusion_counts(y_true: List[int], y_pred: List[int]) -> Tuple[int, int, int, int]:
    """
    Assumes 0 = negative, 1 = positive.
    Returns TP, FP, FN, TN
    """
    tp = fp = fn = tn = 0
    for t, p in zip(y_true, y_pred):
        if t == 1 and p == 1:
            tp += 1
        elif t == 0 and p == 1:
            fp += 1
        elif t == 1 and p == 0:
            fn += 1
        else:  # t == 0 and p == 0
            tn += 1
    return tp, fp, fn, tn

def precision_recall_f1(y_true: List[int], y_pred: List[int]) -> Tuple[float, float, float]:
    tp, fp, fn, tn = confusion_counts(y_true, y_pred)
    precision = tp / (tp + fp + 1e-12)
    recall    = tp / (tp + fn + 1e-12)
    f1 = 2 * precision * recall / (precision + recall + 1e-12)
    return precision, recall, f1

def iou_score(y_true: List[int], y_pred: List[int]) -> float:
    tp, fp, fn, tn = confusion_counts(y_true, y_pred)
    intersection = tp
    union = tp + fp + fn
    return intersection / (union + 1e-12)

def tpr_fpr(y_true: List[int], y_pred: List[int]) -> Tuple[float, float]:
    """
    TPR = recall, FPR = FP / (FP + TN)
    """
    tp, fp, fn, tn = confusion_counts(y_true, y_pred)
    tpr = tp / (tp + fn + 1e-12)
    fpr = fp / (fp + tn + 1e-12)
    return tpr, fpr

# example
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 0, 0, 1]
p, r, f1 = precision_recall_f1(y_true, y_pred)
iou = iou_score(y_true, y_pred)
tpr, fpr = tpr_fpr(y_true, y_pred)
print(f"p: {p}, r: {r}, f1: {f1}")
print(f"iou: {iou}")
print(f"tpr: {tpr}, fpr: {fpr}")


p: 0.9999999999995, r: 0.6666666666664445, f1: 0.7999999999991999
iou: 0.6666666666664445
tpr: 0.6666666666664445, fpr: 0.0


In [16]:

def confusion_counts_manual(y_true: List[int], y_pred: List[int]):
    tp = fp = tn = fn = 0
    for t, p in zip(y_true, y_pred):
        if t == 1 and p == 1:
            tp += 1
        elif t == 1 and p == 0:
            fn += 1
        elif t == 0 and p == 1:
            fp += 1
        else:
            tn += 1
    return tp, tn, fp, fn

def precision_recall_f1_manual(y_true: List[int], y_pred: List[int]):
    tp, tn, fp, fn = confusion_counts_manual(y_true, y_pred)
    precision = tp / (tp + fp + 1e-13)
    recall = tp / (tp + fn + 1e-13)
    f1 = 2 * precision * recall / (precision + recall + 1e-13)
    return precision, recall, f1

def iou_manual(y_true: List[int], y_pred: List[int]):
    tp, tn, fp, fn = confusion_counts_manual(y_true, y_pred)
    iou = tp / (tp + fp + fn + 1e-13)
    return iou


def tpr_fpr_manual(y_true: List[int], y_pred: List[int]):
    tp, tn, fp, fn = confusion_counts_manual(y_true, y_pred)

    tpr = tp / (tp + fn + 1e-13)
    fpr = fp / (fp + tn + 1e-13)

    return tpr, fpr


# example
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 0, 0, 1]
p, r, f1 = precision_recall_f1_manual(y_true, y_pred)
iou = iou_manual(y_true, y_pred)
tpr, fpr = tpr_fpr_manual(y_true, y_pred)
print(f"p: {p}, r: {r}, f1: {f1}")
print(f"iou: {iou}")
print(f"tpr: {tpr}, fpr: {fpr}")

p: 0.99999999999995, r: 0.6666666666666444, f1: 0.79999999999992
iou: 0.6666666666666444
tpr: 0.6666666666666444, fpr: 0.0


## 3. Normalization (standard score)

In [23]:
def standardize(xs: List[float]):
    n = len(xs)
    mean_xs = sum(xs)/n
    var = sum((xs_i - mean_xs)**2 for xs_i in xs)/n
    std = math.sqrt(var)
    xs_stand = [(x - mean_xs)/std for x in xs]
    return xs_stand


values = [1.0, 2.0, 3.0, 4.0]
z = standardize(values)
print(z)



[-1.3416407864998738, -0.4472135954999579, 0.4472135954999579, 1.3416407864998738]


## 4. One Gradient Step for Linear Regression

Model:
```y^=ùë§ùë•+ùëè```

Loss: mean squared error

In [25]:
from decimal import DivisionImpossible


def linear_regression_step(xs: List[float], ys: List[float], w: float, b: float, lr: float):
    dw = 0
    db = 0
    for (x, y) in zip(xs, y_true):
        y_pred = x*w + b
        e = - y + y_pred
        dw += x*e
        db += e
    dw *= 2/len(xs)
    dw *= 2/len(xs)

    w_new = w - lr*dw
    b_new = b - lr*db

    return w_new, b_new

# example
xs = [1, 2, 3]
ys = [2, 4, 6]
w, b = 0.0, 0.0
w, b = linear_regression_step(xs, ys, w, b, lr=0.01)

print(f"w = {w}, b = {b}")

w = 0.02222222222222222, b = 0.02
