## z test

In [None]:
import numpy as np
from scipy import stats

def z_test(sample_data, population_mean, population_std, alpha=0.05):
    # Step 1: Calculate the sample mean
    sample_mean = np.mean(sample_data)

    # Step 2: Calculate the sample size
    n = len(sample_data)

    # Step 3: Compute the Z statistic
    z = (sample_mean - population_mean) / (population_std / np.sqrt(n))

    # Step 4: Calculate the p-value from the Z statistic
    p_value = 2 * (1 - stats.norm.cdf(abs(z)))  # Two-tailed test

    # Step 5: Compare p-value with alpha (significance level)
    if p_value < alpha:
        result = "Reject the null hypothesis"
    else:
        result = "Fail to reject the null hypothesis"

    return z, p_value, result

# Example usage:
# Sample data, population mean, population standard deviation
sample_data = [120, 130, 125, 135, 110, 140, 150]
population_mean = 128
population_std = 15

z_stat, p_val, decision = z_test(sample_data, population_mean, population_std)
print(f"Z-Statistic: {z_stat}")
print(f"P-Value: {p_val}")
print(f"Decision: {decision}")


Z-Statistic: 0.35276684147527876
P-Value: 0.7242632442784087
Decision: Fail to reject the null hypothesis


## t test

In [None]:
import numpy as np
from scipy import stats

def t_test(sample_data, population_mean, alpha=0.05):
    # Step 1: Calculate the sample mean and sample standard deviation
    sample_mean = np.mean(sample_data)
    sample_std = np.std(sample_data, ddof=1)  # Sample standard deviation (ddof=1 for unbiased estimator)

    # Step 2: Calculate the sample size
    n = len(sample_data)

    # Step 3: Compute the T-statistic
    t_stat = (sample_mean - population_mean) / (sample_std / np.sqrt(n))

    # Step 4: Calculate the p-value using the T distribution
    p_value = 2 * (1 - stats.t.cdf(np.abs(t_stat), df=n-1))  # Two-tailed test

    # Step 5: Compare p-value with alpha (significance level)
    if p_value < alpha:
        result = "Reject the null hypothesis"
    else:
        result = "Fail to reject the null hypothesis"

    return t_stat, p_value, result

# Example usage:
sample_data = [22, 25, 29, 30, 24, 20, 28]
population_mean = 26

t_stat, p_val, decision = t_test(sample_data, population_mean)
print(f"T-Statistic: {t_stat}")
print(f"P-Value: {p_val}")
print(f"Decision: {decision}")


T-Statistic: -0.40474995441293204
P-Value: 0.6996958415192993
Decision: Fail to reject the null hypothesis


## chi square test

In [None]:
import numpy as np
from scipy import stats

def chi_square_test(observed, expected, alpha=0.05):
    # Step 1: Calculate the Chi-Square statistic
    chi_square_stat = sum((obs - exp) ** 2 / exp for obs, exp in zip(observed, expected))

    # Step 2: Degrees of freedom
    df = len(observed) - 1

    # Step 3: Calculate the p-value from the Chi-square distribution
    p_value = 1 - stats.chi2.cdf(chi_square_stat, df)

    # Step 4: Compare p-value with alpha (significance level)
    if p_value < alpha:
        result = "Reject the null hypothesis"
    else:
        result = "Fail to reject the null hypothesis"

    return chi_square_stat, p_value, result

# Example usage:
observed = [50, 30, 20]
expected = [40, 40, 20]

chi_stat, p_val, decision = chi_square_test(observed, expected)
print(f"Chi-Square Statistic: {chi_stat}")
print(f"P-Value: {p_val}")
print(f"Decision: {decision}")


Chi-Square Statistic: 5.0
P-Value: 0.08208499862389884
Decision: Fail to reject the null hypothesis


## Anova

In [None]:
import numpy as np

def anova(*groups):
    # Step 1: Calculate the overall mean (grand mean)
    all_data = np.concatenate(groups)
    grand_mean = np.mean(all_data)

    # Step 2: Calculate the between-group sum of squares (SSB)
    SSB = sum(len(group) * (np.mean(group) - grand_mean) ** 2 for group in groups)

    # Step 3: Calculate the within-group sum of squares (SSW)
    SSW = sum(np.sum((group - np.mean(group)) ** 2) for group in groups)

    # Step 4: Degrees of freedom
    df_between = len(groups) - 1
    df_within = len(all_data) - len(groups)

    # Step 5: Mean squares
    MSB = SSB / df_between
    MSW = SSW / df_within

    # Step 6: F-statistic
    F_stat = MSB / MSW

    # Step 7: Calculate the p-value
    p_value = 1 - stats.f.cdf(F_stat, df_between, df_within)

    return F_stat, p_value

# Example usage:
group1 = [20, 21, 22, 23, 24]
group2 = [30, 31, 32, 33, 34]
group3 = [40, 41, 42, 43, 44]

F_stat, p_val = anova(group1, group2, group3)
print(f"F-Statistic: {F_stat}")
print(f"P-Value: {p_val}")


F-Statistic: 200.0
P-Value: 6.105260741406937e-10
