# 05. Statistics

## Imports

In [15]:
import math

from collections import Counter

In [22]:
# Import functions from another notebook

%run ../04_linear_algebra/main.ipynb

## Central Trends

Arithmetic mean

In [3]:
def mean(xs: list[float]) -> float:
    return sum(xs) / len(xs)

Median - closest value to center

In [2]:
def _median_odd(xs: list[float]) -> float:
    return sorted(xs)[len(xs) // 2]

In [4]:
def _median_even(xs: list[float]) -> float:
    sorted_xs = sorted(xs)
    hi_midpoint = len(xs) // 2
    return (sorted_xs[hi_midpoint - 1] + sorted_xs[hi_midpoint]) / 2

In [6]:
def median(v: list[float]) -> float:
    return _median_odd(v) if len(v) % 2 else _median_even(v)


assert median([1, 10, 2, 9, 5]) == 5
assert median([1, 9, 2, 10]) == (2 + 9) / 2

Quantile - median on certain percentile of data

In [7]:
def quantile(xs: list[float], p: float) -> float:
    p_index = int(p * len(xs))
    return sorted(x)[p_index]

Mode - most common values

In [10]:
def mode(xs: list[float]) -> list[float]:
    counts = Counter(xs)
    max_count = max(counts.values())
    return [x_i for x_i, count in counts.items() if count == max_count]

## Variation

Range - difference between maximum and minimum values

In [11]:
def data_range(xs: list[float]) -> float:
    return max(xs) - min(xs)

Function of subtracting value from mean

In [13]:
def de_mean(xs: list[float]) -> list[float]:
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

Variance obtaining function

In [23]:
def variance(xs: list[float]) -> float:
    assert len(xs) >= 2, "[-] Variance requires at least two elements!"

    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)

Standard deviation obtaining function

In [16]:
def standard_deviation(xs: list[float]) -> float:
    return math.sqrt(variance(xs))

Interquartile range obtaining function

In [17]:
def interquartile_range(xs: list[float]) -> float:
    return quantile(xs, 0.75) - quantile(xs, 0.25)

## Correlation

Covariance - measures deviation of two variables

In [21]:
def covariance(xs: list[float], ys: list[float]) -> float:
    assert len(xs) == len(ys), "[-] XS and YS must have same number of elements!"
    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)

Correlation - dependence of one variable on another

In [19]:
def correlation(xs: list[float], ys: list[float]) -> float:
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0