In [24]:
from typing import Callable, Sequence

import numpy as np
import scipy.stats
import csv

In [25]:
ABSOLUTE_PATH = "C:/Users/arman/it/prog/AI"
SEED = 3

In [26]:
class Sample:
    def __init__(self, name: str, sample_func: Callable[[int], Sequence[int]]):
        self.name = name
        self.sample_func = sample_func

all_samples = [
    Sample(
        name="uniform", 
        sample_func=lambda size: scipy.stats.uniform.rvs(loc=5, scale=10, size=size, random_state=SEED), 
    ),
    Sample(
        name="bernoulli", 
        sample_func=lambda size: scipy.stats.bernoulli.rvs(p=0.7, size=size, random_state=SEED),
    ),
    Sample(
        name="binominal", 
        sample_func=lambda size: scipy.stats.binom.rvs(n=20, p=0.6, size=size, random_state=SEED),
    ),
    Sample(
        name="normal",
        sample_func=lambda size: scipy.stats.norm.rvs(loc=10, scale=2, size=size, random_state=SEED),
    ),
]

In [27]:
class Estimator:
    def __init__(self, name: str, estimator_func: Callable[[np.ndarray], int]):
        self.name = name
        self.estimator_func = estimator_func


all_estimators = [
    Estimator(name="loc", estimator_func=lambda sample: np.min(sample)),
    Estimator(name="scale", estimator_func=lambda sample: np.max(sample) - np.min(sample)),
    Estimator(name="p", estimator_func=lambda sample: np.mean(sample)),
    Estimator(name="std", estimator_func=lambda sample: np.std(sample, ddof=1)),
]

In [28]:
def get_true_value(
    sample_array: Sequence[int],
	estimator: Callable[[np.ndarray], int],
    n_resamples: int = 1000
) -> int:
    # я не знаю какая генеральная совокупность,
    # но чтобы приблизиться к правде, я сделаю оценку по совокупности
    # из такого же количества выборок
    sample = np.array(sample_array)
    n = len(sample)
    general_sample = np.array([])

    for _ in range(n_resamples):
        general_sample = np.concatenate((
            general_sample,
            np.random.choice(sample, size=n, replace=True)
        ))
    return estimator(general_sample)

In [29]:
def my_bootstrap(
	sample_array: Sequence[int],
	estimator: Callable[[np.ndarray], int],
	confidence_level: float = 0.95,
	n_resamples: int = 1000
) -> tuple[int, int]:

	sample = np.array(sample_array)

	bootstrap_estimates = []
	n = len(sample)

	for _ in range(n_resamples):
		bootstrap_sample = np.random.choice(sample, size=n, replace=True)
		bootstrap_estimates.append(estimator(bootstrap_sample))

	alpha = 1 - confidence_level
	lower_percentile = alpha / 2 * 100
	upper_percentile = (1 - alpha / 2) * 100

	lower_bound = np.percentile(bootstrap_estimates, lower_percentile)
	upper_bound = np.percentile(bootstrap_estimates, upper_percentile)

	return lower_bound, upper_bound

In [30]:
data = []
for size in [100, 1000]:
    for sample in all_samples:
        for estimator in all_estimators:
            true_value = get_true_value(
                sample.sample_func(size),
                estimator.estimator_func
            )
            my_lower_bound, my_upper_bound = my_bootstrap(
                sample.sample_func(size),
                estimator.estimator_func
            )
            scipy_result = scipy.stats.bootstrap(
                (sample.sample_func(size),),
                estimator.estimator_func
            )
            scipy_lower_bound = scipy_result.confidence_interval.low
            scipy_upper_bound = scipy_result.confidence_interval.high
            
            data.append(dict(
                sample_name=f"{sample.name} {size}",
                estimator=estimator.name,
                true_value=true_value,
                my_lower_bound=my_lower_bound,
                my_upper_bound=my_upper_bound,
                scipy_lower_bound=scipy_lower_bound,
                scipy_upper_bound=scipy_upper_bound
            ))
            print(f"{sample.name} {size}", estimator.name)

with open(f'{ABSOLUTE_PATH}/dist/table.csv', 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=data[0].keys())

    writer.writeheader()
    writer.writerows(data)

uniform 100 loc
uniform 100 scale
uniform 100 p
uniform 100 std
bernoulli 100 loc
bernoulli 100 scale
bernoulli 100 p
bernoulli 100 std
binominal 100 loc
binominal 100 scale
binominal 100 p
binominal 100 std
normal 100 loc
normal 100 scale
normal 100 p
normal 100 std
uniform 1000 loc
uniform 1000 scale
uniform 1000 p
uniform 1000 std
bernoulli 1000 loc
bernoulli 1000 scale
bernoulli 1000 p
bernoulli 1000 std
binominal 1000 loc
binominal 1000 scale
binominal 1000 p
binominal 1000 std
normal 1000 loc
normal 1000 scale
normal 1000 p
normal 1000 std
