In [5]:
# %load ./dsfs/ml.py
import random

from typing import TypeVar, List, Tuple

X = TypeVar('X')
Y = TypeVar('Y')


def split_data(data: List[X], prob: float, shuffle: bool = True) -> Tuple[List[X], List[X]]:
    data = data[:]
    if shuffle: random.shuffle(data)

    cut = int(len(data) * prob)
    return data[:cut], data[cut:]


def train_test_split(xs: List[X], ys: List[Y], test_pct: float) -> Tuple[List[X], List[X], List[Y], List[Y]]:
    idx = [i for i in range(len(xs))]
    train_idx, test_idx = split_data(idx, 1 - test_pct)
    return (
        [xs[i] for i in train_idx],
        [xs[i] for i in test_idx],
        [ys[i] for i in train_idx],
        [ys[i] for i in test_idx]
    )


In [6]:
data = list(range(1000))
train, test = split_data(data, 0.75)
assert len(train) == 750
assert len(test) == 250
assert sorted(train + test) == data

In [7]:
xs = list(range(1000))
ys = [2 * x for x in xs]
x_train, x_test, y_train, y_test = train_test_split(xs, ys, 0.25)
assert len(x_train) == len(y_train) == 750
assert len(y_test) == len(x_test) == 250

assert all(y == 2 * x for x, y in zip(x_train, y_train))
assert all(y == 2 * x for x, y in zip(x_test, y_test))

In [9]:
# %load ./dsfs/scoring.py

# tp -> true positive
# fp -> false positive
# fn -> false negative
# tn -> true negative

def accuracy(tp: int, fp: int, fn: int, tn: int) -> float:
    "The proportion of correct predictions"
    correct = tp + tn
    total = tp + fp + fn + tn
    return correct/total


def precision(tp: int, fp: int, fn: int, tn: int) -> float:
    "Precision measures the accuracy of the positive predictions"
    return tp / (tp + fp)


def recall(tp: int, fp: int, fn: int, tn: int) -> float:
    "Recall measures the proportion of the positives identified"
    return tp / (tp + fn)


def f1_score(tp: int, fp: int, fn: int, tn: int) -> float:
    p = precision(tp, fp, fn, tp)
    r = recall(tp, fp, fn, tn)
    return 2 * p * r / (p + r)


|            | Leukemia | No Leukemia | Total   |
|------------|----------|-------------|---------|
| "Luke"     | 70       |      4930   |  5000   |
| Not "Luke" | 13930    |      981070 |  995000 |
| total      | 14000    |      986000 | 1000000 |

In [14]:
tp, fp, fn, tn = 70, 4930, 13930, 981070

assert accuracy(tp, fp, fn, tn) == 0.98114, accuracy(tp, fp, fn, tn) # High Accuracy
assert precision(tp, fp, fn, tn) == 0.014, precision(tp, fp, fn, tn) # Low precision
assert recall(tp, fp, fn, tn) == 0.005, recall(tp, fp, fn, tn) # Low recall

f1_score(tp, fp, fn, tn) # Really bad f1 score

0.00736842105263158