# Statistics Literacy delta 02

In [62]:
import numpy as np
from scipy import stats
import pandas as pd
from sympy import *


def read_from_excel(file_path):
    df = pd.read_excel(file_path)
    columns_data = {col: df[col].values for col in df.columns}
    A = columns_data[df.columns[0]]
    B = columns_data[df.columns[1]]
    A = A[~np.isnan(A)]
    B = B[~np.isnan(B)]
    return A, B

## Week 1-1

In [58]:
# A, B = read_from_excel('practice1-1.xlsx') # practice 1-1
A, B = read_from_excel('mini-exam1-1.xlsx') # exercise 1-1

# Sample sizes
n1 = len(A)
n2 = len(B)
# Means
mean1 = np.mean(A)
mean2 = np.mean(B)
# Variances
var1 = np.var(A, ddof=1)  # Use ddof=1 for sample variance
var2 = np.var(B, ddof=1)
# Pooled variance
pooled_variance = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
# Standard error
standard_error = np.sqrt(pooled_variance * (1/n1 + 1/n2))
# t-statistic
t_statistic = (mean1 - mean2) / standard_error
mean_diff = mean1 - mean2

# Degrees of freedom
degrees_of_freedom = n1 + n2 - 2
# p-value
p_value = stats.t.sf(np.abs(t_statistic), df=degrees_of_freedom) * 2  # Two-tailed test

print("Degrees of freedom={}\nt-statistic={:.3f}\nTwo-tailed test p-value = {:.3f}".format(
    degrees_of_freedom, t_statistic, p_value))

# Significance level for a two-tailed test (e.g., 0.02 significance level)
alpha = 0.05
# For a two-tailed test, divide the alpha by 2
alpha_half = alpha / 2
# Calculate the t critical value
t_critical = stats.t.ppf(alpha_half, degrees_of_freedom)
# Since it's two-tailed, we need both the positive and negative critical values
negative_t_critical = t_critical
positive_t_critical = -t_critical

lower_conf = mean_diff + negative_t_critical * standard_error
upper_conf = mean_diff + positive_t_critical * standard_error

print("Rejection region: t_0 < {:.3f} or t_0 > {:.3f}".format(negative_t_critical, positive_t_critical))
print("Confidence limit=[{:.3f}, {:.3f}]".format(lower_conf, upper_conf))

Degrees of freedom=18
t-statistic=nan
Two-tailed test p-value = nan
Rejection region: t_0 < -2.101 or t_0 > 2.101
Confidence limit=[nan, nan]


## Week 1-2

In [72]:
# A, B = read_from_excel('practice1-2.xlsx') # exercise 1-2
A, B = read_from_excel('mini-exam1-2.xlsx') # exam 1-2
print(f"A={A}\nB={B}")

A=[164.3 167.3 161.9 170.8 171.3 166.  168.6 162.7 165.7 168.2]
B=[166.1 164.5 164.4 162.9 159.2 170.2 168.8 164.2]


In [78]:
# Sample sizes
n1 = len(A)
n2 = len(B)
# Means
mean1 = np.mean(A)
mean2 = np.mean(B)
# Variances. Use ddof=1 for sample variance
var1 = np.var(A, ddof=1) 
var2 = np.var(B, ddof=1)
varience_ratio = var1 / var2
print("Mean(A)={:.3f}, Mean(B)={:.3f}".format(mean1, mean2))
print("Unbiased variance estimators Var(A)={:.6f}, Var(B)={:.6f}, v_1/v_2={:.5f}".format(var1, var2, varience_ratio))

# Significance level for a two-tailed test (e.g., 0.02 significance level)
alpha = 0.05
# For a two-tailed test, divide the alpha by 2
alpha_half = alpha / 2
# Calculate the F-distribution critical value
f_critical_low = stats.f.ppf(alpha_half, dfn=n1-1, dfd=n2-1)
f_critical_high = stats.f.ppf(1 - alpha_half, dfn=n1-1, dfd=n2-1)

lower_conf = varience_ratio / f_critical_high
upper_conf = varience_ratio / f_critical_low
print("Rejection region: t_0<{:.5f} or t_0>{:.5f}".format(f_critical_low, f_critical_high))
print("confidence interval for var1/var2=[{:.5f}, {:.5f}]".format(lower_conf, upper_conf))

Mean(A)=166.680, Mean(B)=165.038
Unbiased variance estimators Var(A)=10.052889, Var(B)=11.711250, v_1/v_2=0.85840
Rejection region: t_0<0.23826 or t_0>4.82322
confidence interval for var1/var2=[0.17797, 3.60273]


## Week 1-3

In [92]:
A, B = read_from_excel('mini-exam1-3.xlsx') # Video 1-3
print(f"A={A}\nB={B}")

A=[252 257 256 238 258 264 251 251 267 228 268 265 263 231 288 251 222 248
 263 261 249 247 260 251 248]
B=[248. 265. 245. 265. 288. 257. 279. 243. 257. 271. 264. 281.]


In [94]:
# Sample sizes
n1 = len(A)
n2 = len(B)
# Means
mean1 = np.mean(A)
mean2 = np.mean(B)
mean_diff = mean1 - mean2
# Variances
var1 = np.var(A, ddof=1)  # Use ddof=1 for sample variance
var2 = np.var(B, ddof=1)
# Standard error
standard_error = np.sqrt(var1 / n1 + var2 / n2)
# t-statistic
t_statistic = (mean1 - mean2) / standard_error
print("Mean(A)={:.3f}, Mean(B)={:.3f}".format(mean1, mean2))
print("Unbiased variance estimators Var(A)={:.5f}, Var(B)={:.5f}".format(var1, var2))
print("t-statistic={:.3f}".format(t_statistic))

est_var = (var1/n1)**2 / (n1-1) + (var2/n2)**2 / (n2-1)
degrees_of_freedom = (var1/n1 + var2 / n2) ** 2 / est_var
# degrees_of_freedom = round(degrees_of_freedom, 4)
print("Degrees_of_freedom={:.5f}".format(degrees_of_freedom))

# Significance level for a two-tailed test
alpha = 0.05
alpha_half = alpha / 2 # For a two-tailed test, divide the alpha by 2
# Calculate the t critical value
t_critical = stats.t.ppf(alpha_half, degrees_of_freedom)
t_critical = -2.079614
lower_conf = mean_diff + t_critical * standard_error
upper_conf = mean_diff - t_critical * standard_error
print("Rejection region: |t_0| > {:.3f}".format(np.abs(t_critical)))
print("confidence interval for \mu1-\mu2=[{:.3f}, {:.3f}]".format(lower_conf, upper_conf))

Mean(A)=253.480, Mean(B)=263.583
Unbiased variance estimators Var(A)=195.09333, Var(B)=208.62879
t-statistic=-2.013
Degrees_of_freedom=21.13907
Rejection region: |t_0| > 2.080
confidence interval for \mu1-\mu2=[-20.541, 0.334]
