## Mean Median Mode Range IQR Variance Standard Dev

In [0]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
np.random.seed(42)

In [0]:
# Insert Sample Data
data = [10, 20, 50, 40, 50, 60, 70, 80]

In [0]:
# Insert Sample Data
s = pd.Series(data)

# 8. Visualizations
plt.figure(figsize=(6, 3))

# Subplot 1: Histogram
plt.subplot(1, 2, 1)
plt.hist(s, color='skyblue', edgecolor='black', alpha=0.7)
plt.title("Distribution of Data")
plt.xlabel("Value")
plt.ylabel("Frequency")

# Plot 2: Box Plot
plt.subplot(1, 2, 2)
plt.boxplot(s)
plt.title("Box Plot of Data")
plt.ylabel("Values")

plt.tight_layout()
plt.show()

In [0]:
# 1. Mean (Average)
mean_val = s.mean()

# 2. Median (Middle value)
median_val = s.median()

# 3. Mode (Most frequent value)
# Note: Returns a Series (use .iloc[0] for the first mode)
mode_val = s.mode().iloc[0]

# 4. Range (Max - Min)
data_range = s.max() - s.min()

# 5. IQR (Interquartile Range: Q3 - Q1)
iqr_val = s.quantile(0.75) - s.quantile(0.25)

# 6. Variance
variance_val = s.var()

# 7. Standard Deviation
std_dev_val = s.std()

# Print results
print("Sample Statistics:\n")
print(f"Count:{len(s)},  Mean: {mean_val}, Std Dev: {std_dev_val:.5f}, Variance: {variance_val:.5f}")
print(f"Median: {median_val}, Range: {data_range}, IQR: {iqr_val}, Mode: {mode_val}")

# One Sample test Mean

In [0]:
# sample stats insert here
n = 25                  # sample size
X_bar = 172.50              # sample mean
sample_std = 15.40          # sample std dev (needed if sigma is unknown)

# Insert Population Parameters
μ0 = 168               # Hypothesized poulation mean, None if not known
sigma = None            # population standard deviation, None if not known
confidence = 0.95       # 0.95 for 95 % Confidence Interval
test_side = 2           # 1 for one sided, 2 for two sided
relative_mean = 'Lower'  # Claim of relative position of population mean, 'Lower' or 'Higher', needed for 1 sided test

In [0]:
alpha = 1 - confidence
prob = 1 - alpha / test_side

# Compute the Confidence Interval for Population Mean
if sigma is None:
    # ---- T-interval (sigma unknown) ----
    df = n - 1
    t_score = stats.t.ppf(prob, df)
    
    margin_error = t_score * sample_std / np.sqrt(n)
    ci_lower = X_bar - margin_error
    ci_upper = X_bar + margin_error

    print(f"T-score: {t_score:.5f} at T({1 - alpha/test_side:.4f}, {df})")
    print(f"Sample Mean: {X_bar}")
    print(f"Sample Std Dev: {sample_std}")
    if test_side == 2:
        print(f"{int(confidence*100)}% CI for Population Mean: ({ci_lower:.5f}, {ci_upper:.5f})")
    else:
        print(f"Right-tailed CI (μ > μ0): [{ci_lower:.4f}, ∞)")
        print(f"Left-tailed CI (μ < μ0): (-∞, {ci_upper:.4f}])")

else:
    # ---- Z-interval (sigma known) ----
    z_score = stats.norm.ppf(prob)
    
    margin_error = z_score * sigma / np.sqrt(n)
    ci_lower = X_bar - margin_error
    ci_upper = X_bar + margin_error

    print(f"Z-score: {z_score:.5f} at Z({1 - alpha/test_side:.4f})")
    print(f"Sample Mean: {X_bar}")
    print(f"Population Std Dev: {sigma}")
    if test_side == 2:
        print(f"{int(confidence*100)}% CI for Population Mean: ({ci_lower:.5f}, {ci_upper:.5f})")
    else:
        print(f"Right-tailed CI (μ > μ0): [{ci_lower:.4f}, ∞)")
        print(f"Left-tailed CI (μ < μ0): (-∞, {ci_upper:.4f}])")

In [0]:
# Compare if Population mean is within the confidence interval

if test_side == 2:
    if ci_lower <= μ0 <= ci_upper:
        print(f"Population mean {μ0} is within the confidence interval.")
        print("Accept the Null Hypothesis")
    else:
        print(f"Population mean {μ0} is NOT within the confidence interval.")
        print("Reject the Null Hypothesis")
else:
    if relative_mean == 'Lower':
        if ci_upper >= μ0:
            print(f"Population mean {μ0} is within the confidence interval.")
            print("Accept the Null Hypothesis")
        else:
            print(f"Population mean {μ0} is NOT within the confidence interval.")
            print("Reject the Null Hypothesis")
    else:
        if ci_lower <= μ0:
            print(f"Population mean {μ0} is within the confidence interval.")
            print("Accept the Null Hypothesis")
        else:
            print(f"Population mean {μ0} is NOT within the confidence interval.")
            print("Reject the Null Hypothesis")
    


Determine Sample Size

In [0]:
p value, 

# Two Sample Tests for ΔMean

In [0]:
# Sample sizes
n1, n2 = 20, 20

# Sample means, provide one of the following
X1, X2 = None, None
# Xd = X2 - X1

# Hypothesized Population Means, provide one of the following
μ1, μ2 = None, None
# μd = μ2 - μ1

# Population Variances known
sigma1, sigma2 = None, None

confidence = 0.95       # 0.95 for 95 % Confidence Interval
test_side = 2           # 1 for one sided, 2 for two sided
relative_mean = 'Lower'  # Claim of relative position of population mean, 'Lower' or 'Higher', needed for 1 sided test
alpha = 1 - confidence
Z_score = stats.norm.ppf(1 - alpha / test_side)

sigma_factor = np.sqrt(sigma1**2/n1 + sigma2**2/n2)

Z-value = (X2 - X1 - (μ2 - μ1)) / sigma_factor
p-value = 2 * (1 - stats.norm.cdf(abs(Z-value)))

if test_side == 2:
    CI_lower = X2 - X1 - Z_score * sigma_factor
    CI_upper = X2 - X1 + Z_score * sigma_factor
else:
    if relative_mean == 'Lower':
        CI_lower = X2 - X1 - Z_score * sigma_factor
        CI_upper = np.inf
    else:
        CI_lower = -np.inf
        CI_upper = X2 - X1 + Z_score * sigma_factor

print(f"Z-score: {Z_score:.5f} for Z({1 - alpha/test_side:.4f})")
print(f"C.I. for Difference in Means: ({CI_lower:.5f}, {CI_upper:.5f})")
print(f"Z-value: {Z-value:.5f}")
print(f"p-value: {p-value:.5f}")
if p-value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Accept the Null Hypothesis")


In [0]:
# Sample sizes
n1, n2 = 21, 25

# Sample means, provide one of the following
X1, X2 = 3.27, 2.53
# Xd = X2 - X1

# Hypothesized Population Means, provide one of the following
μ1, μ2 = 0, 0
# μd = μ2 - μ1

# Sample variances for Population Variances not known and assumed equal
s1, s2 = 1.30, 1.16
sp = np.sqrt(((n1 - 1) * s1**2 + (n2 - 1) * s2**2) / (n1 + n2 - 2))

confidence = 0.95       # 0.95 for 95 % Confidence Interval
test_side = 2           # 1 for one sided, 2 for two sided
alpha = 1 - confidence

# Degrees of Freedom
df = n1 + n2 - 2

t_score = stats.t.ppf(1 - alpha / test_side, df)
sigma_factor = np.sqrt(sp**2/n1 + sp**2/n2)

t_value = (X2 - X1 - (μ2 - μ1)) / sigma_factor
p_value = stats.t.cdf(t_value, df)

if test_side == 2:
    CI_lower = X2 - X1 - t_score * sigma_factor
    CI_upper = X2 - X1 + t_score * sigma_factor
else:
    if relative_mean == 'Lower':
        CI_lower = X2 - X1 - t_score * sigma_factor
        CI_upper = np.inf
    else:
        CI_lower = -np.inf
        CI_upper = X2 - X1 + t_score * sigma_factor
print(f"t-score: {t_score:.5f} for t({df}, {1 - alpha/test_side:.4f})")
print(f"C.I. for Difference in Means: ({CI_lower:.5f}, {CI_upper:.5f})")
print(f"t-value: {t_value:.5f}")
print(f"p-value: {p_value:.5f}")
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Accept the Null Hypothesis")

In [0]:
# Sample sizes
n1, n2 = 20, 20

# Sample means, provide one of the following
X1, X2 = 3.27, 2.53
# Xd = X2 - X1

# Hypothesized Population Means, provide one of the following
μ1, μ2 = 0, 0
# μd = μ2 - μ1

# Sample variances for Population Variances not known and not assumed equal
s1, s2 = 1.30, 1.16

confidence = 0.95       # 0.95 for 95 % Confidence Interval
test_side = 2           # 1 for one sided, 2 for two sided
alpha = 1 - confidence

# Degrees of Freedom
# Welch's T test
df = (s1**2/n1 + s2**2/n2)**2 / ((s1**2/n1)**2/(n1 - 1) + (s2**2/n2)**2/(n2 - 1))

t_score = stats.t.ppf(1 - alpha / test_side, df)
sigma_factor = np.sqrt(s1**2/n1 + s2**2/n2)

t_value = (X2 - X1 - (μ2 - μ1)) / sigma_factor
p_value = stats.t.cdf(t_value, df)
# print(f"t_score = {t_score:.5f}, t_value = {t_value:.5f}, p_value = {p_value:.5f}")
if test_side == 2:
    CI_lower = X2 - X1 - t_score * sigma_factor
    CI_upper = X2 - X1 + t_score * sigma_factor
else:
    if relative_mean == 'Lower':
        CI_lower = X2 - X1 - t_score * sigma_factor
        CI_upper = np.inf
    else:
        CI_lower = -np.inf
        CI_upper = X2 - X1 + t_score * sigma_factor
print(f"t-score: {t_score:.5f} for t({df:.2f}, {1 - alpha/test_side:.4f})")
print(f"C.I. for Difference in Means: ({CI_lower:.5f}, {CI_upper:.5f})")
print(f"t-value: {t_value:.5f}")
print(f"p-value: {p_value:.5f}")
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Accept the Null Hypothesis")

# Two Sample test for ΔVariance 

In [0]:
# Sample sizes
n1, n2 = 21, 25

# Sample variances for Population Variances not known and assumed equal
# H0: sigma1 = sigma2
# HA: sigma1 != sigma2
s1, s2 = 1.30, 1.16

confidence = 0.95       # 0.95 for 95 % Confidence Interval
test_side = 2           # always a 2 for two sided
alpha = 1 - confidence

F_lower = stats.f.ppf(alpha / test_side, n1 - 1, n2 - 1)
F_upper = stats.f.ppf(1 - alpha / test_side, n1 - 1, n2 - 1)
F_value = s1**2 / s2**2
print(f"F-value: {F_value:.5f}")
print(f"F-C.I.: ({F_lower:.5f}, {F_upper:.5f})")
if F_value < F_lower or F_value > F_upper:
    print("Reject the Null Hypothesis: Both Variances are NOT Equal")
else:
    print("Accept the Null Hypothesis: Both Variances are Equal")


# ANOVA

In [0]:
# get 3 series and give ANOVA table output
def anova(*args):
    n = len(args)
    N = sum([len(arg) for arg in args])
    df = n - 1
    sst = sum([sum([(x - np.mean(arg))**2 for x in arg]) for arg in args])
    ssw = sum([sum([(x - np.mean(arg))**2 for x in arg])
               for arg in args])
    sse = sst - ssw
    msb = ssw / df
    mse = sse / (N - n)
    f = msb / mse
    p = stats.f.sf(f, df, N - n)
    return pd.DataFrame([[n, N, df, sst, ssw, sse, msb, mse, f, p]],
                        columns=['n', 'N', 'df', 'sst', 'ssw', 'sse', 'msb', 'mse', 'f', 'p'])

print(anova([1, 2, 3, 7], [4, 8, 6, 7], [7, 8, 9, 7]))