<a href="https://colab.research.google.com/github/newmantic/hypothesis_test/blob/main/hypothesis_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from scipy.stats import norm

def z_test(sample_mean, population_mean, population_std, n):
    """
    Perform a one-sample Z-test.

    Parameters:
    sample_mean (float): The mean of the sample.
    population_mean (float): The mean of the population.
    population_std (float): The standard deviation of the population.
    n (int): The sample size.

    Returns:
    float: The Z score.
    float: The p-value.
    """
    standard_error = population_std / np.sqrt(n)
    z_score = (sample_mean - population_mean) / standard_error
    p_value = 2 * (1 - norm.cdf(abs(z_score)))
    return z_score, p_value

# Testable example:
z_score, p_value = z_test(sample_mean=105, population_mean=100, population_std=15, n=30)
print(f"Z-Score: {z_score}, P-Value: {p_value}")

Z-Score: 1.8257418583505538, P-Value: 0.067889154861829


In [2]:
from scipy.stats import t

def t_test(sample1, sample2):
    """
    Perform an independent two-sample t-test.

    Parameters:
    sample1 (list): Sample data from group 1.
    sample2 (list): Sample data from group 2.

    Returns:
    float: The t-statistic.
    float: The p-value.
    """
    n1, n2 = len(sample1), len(sample2)
    mean1, mean2 = np.mean(sample1), np.mean(sample2)
    var1, var2 = np.var(sample1, ddof=1), np.var(sample2, ddof=1)

    pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
    t_stat = (mean1 - mean2) / (pooled_std * np.sqrt(1/n1 + 1/n2))

    df = n1 + n2 - 2
    p_value = 2 * (1 - t.cdf(abs(t_stat), df))
    return t_stat, p_value

# Testable example:
sample1 = [12.9, 10.5, 9.7, 13.4, 11.9, 10.2]
sample2 = [8.7, 9.3, 10.1, 7.5, 8.2, 9.0]
t_stat, p_value = t_test(sample1, sample2)
print(f"T-Statistic: {t_stat}, P-Value: {p_value}")

T-Statistic: 3.64322320028733, P-Value: 0.0045126389834855285


In [3]:
from scipy.stats import chi2_contingency

def chi_squared_test(observed):
    """
    Perform a Chi-Squared test for independence.

    Parameters:
    observed (list of lists): A contingency table of observed frequencies.

    Returns:
    float: The chi-squared statistic.
    float: The p-value.
    int: The degrees of freedom.
    np.ndarray: The expected frequencies.
    """
    chi2, p, dof, expected = chi2_contingency(observed)
    return chi2, p, dof, expected

# Testable example:
observed = [[10, 20, 30], [6, 9, 17]]
chi2, p_value, dof, expected = chi_squared_test(observed)
print(f"Chi-Squared: {chi2}, P-Value: {p_value}, Degrees of Freedom: {dof}")
print("Expected Frequencies:")
print(expected)

Chi-Squared: 0.27157465150403504, P-Value: 0.873028283380073, Degrees of Freedom: 2
Expected Frequencies:
[[10.43478261 18.91304348 30.65217391]
 [ 5.56521739 10.08695652 16.34782609]]
