In [1]:
import numpy as np
from scipy.stats import t

# confidence intravel 

In [2]:
def compare_means_and_confidence_interval(data1, data2, confidence=0.64):
   
    n1 = len(data1)
    n2 = len(data2)
    mean1 = np.mean(data1)
    mean2 = np.mean(data2)
    std1 = np.std(data1, ddof=1)
    std2 = np.std(data2, ddof=1)
    std_err = np.sqrt((std1**2 / n1) + (std2**2 / n2))
    t_critical = t.ppf((1 + confidence) / 2, n1 + n2 - 2)
    
    t_statistic = (mean1 - mean2) / std_err
    reject_null = np.abs(t_statistic) > t_critical
    
    mean_diff = mean1 - mean2
    margin_of_error = t_critical * std_err
    lower_bound = mean_diff - margin_of_error
    upper_bound = mean_diff + margin_of_error
    
    return reject_null, (lower_bound, upper_bound)

data1 = [30, 40, 50, 60, 70]
data2 = [23, 30, 40, 58, 68]
confidence_level = 0.64
reject_null, confidence_interval = compare_means_and_confidence_interval(data1, data2, confidence_level)

if reject_null:
    print("Null hypothesis rejected: There is a significant difference between the means.")
else:
    print("Null hypothesis not rejected: There is no significant difference between the means.")

print(f"Confidence Interval ({int(confidence_level * 100)}%): {confidence_interval}")


Null hypothesis not rejected: There is no significant difference between the means.
Confidence Interval (64%): (-4.491271348204455, 16.891271348204462)


In [3]:

def mean_confidence_interval(data, confidence=0.64):
    n = len(data)
    mean = np.mean(data)
    std_err = np.std(data, ddof=1) / np.sqrt(n)
    margin_of_error = std_err * t.ppf((1 + confidence) / 2, n - 1)
    lower_bound = mean - margin_of_error
    upper_bound = mean + margin_of_error
    return mean, lower_bound, upper_bound



In [4]:
data = [1, 3, 4, 10, 12, 33, 22, 34, 42, 62]
confidence_level = 0.64
mean, lower_bound, upper_bound = mean_confidence_interval(data, confidence_level)
print(f"Mean: {mean}")
print(f"Confidence Interval ({int(confidence_level * 100)}%): [{lower_bound}, {upper_bound}]")


Mean: 22.3
Confidence Interval (64%): [16.171223520988455, 28.428776479011546]


# hypothesis test

In [5]:
def student_t_test(sample1, sample2, a=0.05):
    
    n1 = len(sample1)
    n2 = len(sample2)
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)
    std1 = np.std(sample1, ddof=1)
    std2 = np.std(sample2, ddof=1)
    
    pooled_std = np.sqrt((std1**2 / n1) + (std2**2 / n2))
    t_statistic = (mean1 - mean2) / pooled_std
    degrees_of_freedom = n1 + n2 - 2
    p_value = 2 * (1 - t.cdf(abs(t_statistic), df=degrees_of_freedom))

    reject_null = p_value < a

    return reject_null, t_statistic, p_value


In [6]:
sample1 = [35, 45, 50, 65, 75]
sample2 = [25, 32, 44, 55, 66]
a= 0.05

reject_null, t_statistic, p_value = student_t_test(sample1, sample2, a)

if reject_null:
    print("Reject the null hypothesis: There is a significant difference between the means.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the means.")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")


Fail to reject the null hypothesis: There is no significant difference between the means.
t-statistic: 0.9304177823275082
p-value: 0.3793752018793941


# one Tailed

In [7]:
def one_tailed_t_test(sample, null_mean, alternative='Greater', a=0.05):
   
    n = len(sample)
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof=1)
    t_statistic = (sample_mean - null_mean) / (sample_std / np.sqrt(n))
    
    if alternative == 'Greater':
        p_value = 1 - t.cdf(t_statistic, df=n - 1)
        reject_null = p_value < a
    elif alternative == 'Less':
        p_value = t.cdf(t_statistic, df=n - 1)
        reject_null = p_value < a
    else:
        raise ValueError("Invalid alternative hypothesis. Choose either 'greater' or 'less'.")

    return reject_null, t_statistic, p_value


In [8]:
sample = [11, 18, 13, 23, 24, 25, 47, 30]
null_mean = 12
alternative = 'Greater'
a = 0.05

reject_null, t_statistic, p_value = one_tailed_t_test(sample, null_mean, alternative, a)

if reject_null:
    print("Reject the null hypothesis: The sample mean is significantly greater than the null mean")
else:
    print("Fail to reject the null hypothesis: The sample mean is not significantly greater than the null mean")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")

Reject the null hypothesis: The sample mean is significantly greater than the null mean
t-statistic: 2.9739406708167078
p-value: 0.01034590253958445


# two tailed

In [9]:
def two_tailed_t_test(sample, null_mean, a=0.05):
   
    n = len(sample)
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof=1)
    t_statistic = (sample_mean - null_mean) / (sample_std / np.sqrt(n))
    degrees_of_freedom = n - 1
    
    p_value = 2 * (1 - t.cdf(abs(t_statistic), df=degrees_of_freedom))

    reject_null = p_value < a

    return reject_null, t_statistic, p_value


In [10]:
sample = [11, 18, 13, 23, 24, 25, 47, 30]
null_mean = 12
a = 0.05

reject_null, t_statistic, p_value = two_tailed_t_test(sample, null_mean, a)

if reject_null:
    print("Reject the null hypothesis: The sample mean is significantly different from the null mean.")
else:
    print("Fail to reject the null hypothesis: The sample mean is not significantly different from the null mean.")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")

Reject the null hypothesis: The sample mean is significantly different from the null mean.
t-statistic: 2.9739406708167078
p-value: 0.0206918050791689
