In [1]:
from __future__ import annotations
from dataclasses import dataclass
import math
from typing import Iterable


In [25]:
@dataclass()
class RunningStat:
    n: float = 0.0
    mean: float = 0.0
    M2: float = 0.0

    # --------------- dunder methods -----------------------
    def __repr__(self):
        v0 = self.var(0)
        v1 = self.var(1)
        return (f"RunningStat(n={self.n}, mean={self.mean:.6g}, "
                f"var_pop={v0 if math.isfinite(v0) else float('nan'):.6g}, "
                f"var_samp={v1 if math.isfinite(v1) else float('nan'):.6g})")

    # --------------- logic -----------------------
    def update(self, x: float):
        '''Implements welford update on a single item'''
        self.n += 1
        delta =  x - self.mean
        self.mean += delta / self.n
        delta_2 = x - self.mean
        self.M2 += delta * delta_2

    def update_many(self, xs: list):
        '''Implements the welford update on a stream of data'''
        for v in xs:
            self.update(v)

    def var(self, ddof: int=0):
        '''ddof=1, for sample; ddof=0 for Population'''
        if self.n - ddof <= 0:
            return float('nan')
        return self.M2 / (self.n - ddof)

    def std(self, ddof: int=0):
        '''Calculates the std by using math.sqrt()'''
        v = self.var(ddof)
        return math.sqrt(v) if math.isfinite(v) else v

    def reset(self):
        '''Resets the inital values'''
        self.n, self.mean, self.M2 = 0.0, 0.0, 0.0

    # def merge(self, other: RunningStats):
    #     if other.n==0:
    #         return
    #     if self.n == 0:
    #         return 
        
        

In [26]:
import numpy as np
xs = np.arange(1, 10, 2)
print(xs)
rs = RunningStat()
rs.update_many(xs)
rs

[1 3 5 7 9]


RunningStat(n=5.0, mean=5, var_pop=8, var_samp=10)

In [27]:
# tiny test
# --- tiny tests for RunningStat (no merge) ---
import math

def approx(a, b, tol=1e-12):
    return abs(a - b) <= tol

# 1) Batch update on a small array
xs = [1, 2, 3, 4]
rs = RunningStat()
rs.update_many(xs)

assert rs.n == 4
assert approx(rs.mean, 2.5)
assert approx(rs.var(ddof=0), 1.25)                    # population variance
assert approx(rs.var(ddof=1), 1.6666666666666667)      # sample variance
assert approx(rs.std(ddof=0), math.sqrt(1.25))
assert approx(rs.std(ddof=1), math.sqrt(1.6666666666666667))

# 2) Incremental updates sanity check
rs2 = RunningStat()
expected = [
    (1, 1.0, 0.0),                 # after 1
    (2, 1.5, 0.25),                # after 2
    (3, 2.0, 2.0/3.0),             # after 3
    (4, 2.5, 1.25),                # after 4
]
for x, (n, mean, var_pop) in zip(xs, expected):
    rs2.update(x)
    assert rs2.n == n
    assert approx(rs2.mean, mean)
    assert approx(rs2.var(ddof=0), var_pop)

# 3) Edge cases
rs3 = RunningStat()
assert math.isnan(rs3.var(ddof=0))          # n=0
assert math.isnan(rs3.var(ddof=1))          # n=0
rs3.update(42)
assert approx(rs3.var(ddof=0), 0.0)         # pop var with n=1 → 0.0
assert math.isnan(rs3.var(ddof=1))          # sample var needs n>=2


In [29]:
xs = [1,2,3,4]
rs = RunningStat(); rs.update_many(xs)
print(rs.n)                 # 4
print(rs.mean)              # 2.5
print(rs.var(0))            # 1.25
print(rs.var(1))            # 1.6666666666666667
print(rs.std(0))            # 1.118033988749895


4.0
2.5
1.25
1.6666666666666667
1.118033988749895


In [30]:
# cool --- didnt understand this much but yeah