In [3]:
import numpy as np
from tqdm import tqdm

In [4]:
data = [2,4,2,4,6,9,3,2]

# Variance, for-cycle, iteration over list

In [29]:
def calculate_var(data, is_sample=False):
    n = len(data)
    mean = sum(data)/n
    sum_square_dist = 0
    for value in data:
        square_dist = (value-mean)**2
        sum_square_dist += square_dist
    denominator = n+1 if is_sample else n
    var = sum_square_dist/denominator
    return var
        

In [30]:
calculate_var(data)

5.25

In [31]:
np.array(data).var()

np.float64(5.25)

# Variance using generator expression

In [10]:
def calculate_var(data, is_sample=False):
    n = len(data)
    mean = sum(data)/n
    #sum of square diff from the mean
    sum_square_diff = sum((x - mean)**2 for x in data)
    denominator = n - 1 if is_sample else n
    variance = sum_square_diff/denominator
    return variance

# Standard deviation(std)

In [15]:
def calculate_std(data):
    var = calculate_var(data)
    std = var**0.5
    return std

# Standard error of the mean(SEM)

In [16]:
def calculate_sem(data):
    std = calculate_std(data)
    n  = len(data)
    sem = std/n**0.5
    return sem
    


# Bootstrapping standard error of the mean(SEM)

In [17]:
def bootstrap_sem(data, n_bootstrap=10000):
    n = len(data)
    bootstrap_means = []
    for i in tqdm(range(n_bootstrap)):
        # Resample with replacement
        bootstrap_sample = np.random.choice(data, size=n, replace=True)
        bootstrap_means.append(np.mean(bootstrap_sample))
    sem = calculate_std(bootstrap_means)
    
    return sem

In [18]:
bootstrap_sem(data)

100%|██████████| 10000/10000 [00:00<00:00, 11133.14it/s]


np.float64(0.8049117649624667)

# Vectorized bootstraping SEM

In [21]:
def vectorized_bootstrap_sem(data, n_bootstraps = 10_000):
    n = len(data)
    boot_sample = np.random.choice(data, size=(n_bootstraps, n), replace=True)
    boots_means = np.mean(boot_sample, axis=1)
    boot_sem = calculate_sem(boots_means)
    return boot_sem

In [22]:
vectorized_bootstrap_sem(data)

np.float64(0.008083134351073192)

In [24]:
bootstrap_sem(data)

100%|██████████| 10000/10000 [00:00<00:00, 12223.21it/s]


np.float64(0.8034187652424728)

In [25]:
calculate_sem(data)

0.8100925873009824