In [12]:
# Basic Imports
from statshelper import StatisticsInfo
import numpy as np
import pandas as pd

# 1. Utilities Examples

## 1.1 `get_statistics`

In [13]:
# Example data
data = [10, 12, 15, 18, 20, 22, 25, 28, 30, 35]

# Get statistics (population)
stats_pop = StatisticsInfo.get_statistics(data, is_sample=False)
print("Population Statistics:")
print(stats_pop)

# Get statistics (sample) with z-scores
stats_sample = StatisticsInfo.get_statistics(data, is_sample=True, return_z_score=True)
print("\nSample Statistics with Z-scores:")
print(stats_sample)

Population Statistics:
{'mean': 21.5, 'std': 7.671375365604267, 'count': 10, 'variance': 58.85, 'cv': 0.35680815653973336, 'min': 10.0, 'max': 35.0, 'q1': 15.75, 'q2': 21.0, 'q3': 27.25, 'iqr': 11.5}

Sample Statistics with Z-scores:
{'mean': 21.5, 'std': 8.086339647138802, 'count': 10, 'variance': 65.38888888888889, 'cv': 0.3761088207971536, 'min': 10.0, 'max': 35.0, 'q1': 15.75, 'q2': 21.0, 'q3': 27.25, 'iqr': 11.5, 'z_score': [-1.4221514927423382, -1.1748207983523664, -0.8038247567674086, -0.4328287151824508, -0.1854980207924789, 0.06183267359749297, 0.4328287151824508, 0.8038247567674086, 1.0511554511573804, 1.6694821871323102]}


## 1.2 `get_z_critical` and `get_t_critical`

In [14]:
# Get critical values based on normal (z) or t-distribution (t)
confidence = 0.95
df = 15  # degrees of freedom, always data_size - 1

z_crit = StatisticsInfo.get_z_critical(confidence)
t_crit = StatisticsInfo.get_t_critical(confidence, df)

print(f"Z-critical for {confidence*100}% confidence: {z_crit:.4f}")
print(f"T-critical for {confidence*100}% confidence with {df} df: {t_crit:.4f}")

Z-critical for 95.0% confidence: 1.9600
T-critical for 95.0% confidence with 15 df: 2.1314


## 1.3 `calculate_bin_width`

In [15]:
# Calculate bin width based on Sturges Rule
data: list[float] = [
    4.7, 4.9, 5.1, 5.4, 5.7, 6.0, 6.3, 6.8, 7.3, 8.9,
    4.8, 4.9, 5.2, 5.5, 5.7, 6.2, 6.4, 6.9, 8.2, 9.1,
    4.8, 5.0, 5.3, 5.6, 5.7, 6.2, 6.5, 7.0, 8.2, 9.9,
    4.9, 5.0, 5.4, 5.6, 5.9, 6.2, 6.7, 7.1, 8.3, 14.1,
    4.9, 5.0, 5.4, 5.7, 6.0, 6.3, 6.8, 7.3, 8.4, 15.2
]

num_bins, bin_width = StatisticsInfo.calculate_bin_width(data)
print(f"Recommended {num_bins} bins with {bin_width} width")

Recommended 7 bins with 1.5 width


# 2. Mean Related Examples

## 2.1 `calculate_mean_sample_size`

In [16]:
# Using z-distribution (known population std_dev)
error_z = StatisticsInfo.calculate_mean_error(
    confidence=0.95,
    data_size=30,
    std_dev=5,
    is_sample_std=False
)
print(f"Margin of error (z): {error_z:.4f}")

# Using t-distribution (sample std_dev)
error_t = StatisticsInfo.calculate_mean_error(
    confidence=0.95,
    data_size=30,
    std_dev=5,
    is_sample_std=True
)
print(f"Margin of error (t): {error_t:.4f}")

Margin of error (z): 1.7892
Margin of error (t): 1.8670


## 2.3 `calculate_mean_pvalue`

In [17]:
# This function returns p_value and can also print an evaluation if desired

# Two-sided t-test example
print('Two-sided')
p_value = StatisticsInfo.calculate_mean_pvalue(
    data_size=25,
    sample_mean=105,
    std_dev=15,
    significance=0.05,
    hypothesis_mean=100,
    alternative="two-sided",
    is_sample_std=True,
    print_evaluation=True
)

# One-sided right z-test example
print('\nRight')
p_value = StatisticsInfo.calculate_mean_pvalue(
    data_size=100,
    sample_mean=52,
    std_dev=10,
    significance=0.05,
    hypothesis_mean=50,
    alternative="right",
    is_sample_std=False,
    print_evaluation=True
)

# One-sided left t-test example
print('\nLeft')
p_value = StatisticsInfo.calculate_mean_pvalue(
    data_size=100,
    sample_mean=52,
    std_dev=10,
    significance=0.05,
    hypothesis_mean=50,
    alternative="left",
    is_sample_std=False,
    print_evaluation=True
)
print()

Two-sided
t_stat=1.6667
p_value=0.10858
decision='Accept H0'

Right
z_stat=2.0000
p_value=0.02275
decision='Reject H0'

Left
z_stat=2.0000
p_value=0.97725
decision='Accept H0'



# 3. Proportion Related Examples

## 3.1 `calculate_proportion_sample_size`

In [18]:
# Infinite population (unknown size)
sample_size_inf = StatisticsInfo.calculate_proportion_sample_size(
    confidence=0.95,
    error=0.03,
    p=0.5
)
print(f"Sample size (infinite population): {sample_size_inf}")

# Finite population (known size)
sample_size_finite = StatisticsInfo.calculate_proportion_sample_size(
    confidence=0.95,
    error=0.03,
    p=0.5,
    population_size=5000
)
print(f"Sample size (finite population N=5000): {sample_size_finite}")

Sample size (infinite population): 1068
Sample size (finite population N=5000): 880


## 3.2 `calculate_proportion_error`

In [19]:
# Margin of error for proportion
error = StatisticsInfo.calculate_proportion_error(
    confidence=0.95,
    data_size=500,
    p=0.4
)
print(f"Margin of error: {error:.4f}")

Margin of error: 0.0429


## 3.3 `calculate_proportion_pvalue`

In [20]:
# This function returns p_value and can also print an evaluation if desired

# Two-sided proportion test
print('Two-sided')
p_value = StatisticsInfo.calculate_proportion_pvalue(
    data_size=1000,
    sample_proportion=0.55,
    significance=0.05,
    hypothesis_proportion=0.5,
    alternative="two-sided",
    print_evaluation=True
)

print('\nRight')
p_value = StatisticsInfo.calculate_proportion_pvalue(
    data_size=1000,
    sample_proportion=0.55,
    significance=0.05,
    hypothesis_proportion=0.5,
    alternative="right",
    print_evaluation=True
)

print('\nLeft')
p_value = StatisticsInfo.calculate_proportion_pvalue(
    data_size=1000,
    sample_proportion=0.55,
    significance=0.05,
    hypothesis_proportion=0.5,
    alternative="left",
    print_evaluation=True
)


Two-sided
z_stat=3.1623
p_value=0.00157
decision='Reject H0'

Right
z_stat=3.1623
p_value=0.00078
decision='Reject H0'

Left
z_stat=3.1623
p_value=0.99922
decision='Accept H0'
