In [1]:
import numpy as np
import scipy.stats as st

# Data
data = np.array([1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85, 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29])
n = len(data)

# a. 99% Confidence Interval using Sample Standard Deviation

# Calculate sample mean and standard deviation
sample_mean = np.mean(data)
sample_std = np.std(data, ddof=1)  # ddof=1 for sample standard deviation

# Degrees of freedom
df = n - 1

# Confidence level
confidence_level = 0.99

# Calculate t-statistic
alpha = 1 - confidence_level
t_critical = st.t.ppf(1 - alpha/2, df) #Two-tailed test

# Calculate margin of error
margin_of_error = t_critical * (sample_std / np.sqrt(n))

# Calculate confidence interval
lower_bound_sample = sample_mean - margin_of_error
upper_bound_sample = sample_mean + margin_of_error

print("99% Confidence Interval (using sample std):")
print(f"Lower Bound: {lower_bound_sample:.4f} million characters")
print(f"Upper Bound: {upper_bound_sample:.4f} million characters")

99% Confidence Interval (using sample std):
Lower Bound: 1.0902 million characters
Upper Bound: 1.3871 million characters


In [3]:
# b. 99% Confidence Interval using Known Population Standard Deviation

# Known population standard deviation
population_std = 0.2

# Calculate z-statistic (since population std is known)
z_critical = st.norm.ppf(1 - alpha/2)  # Two-tailed test

# Calculate margin of error
margin_of_error_pop = z_critical * (population_std / np.sqrt(n))

# Calculate confidence interval
lower_bound_pop = sample_mean - margin_of_error_pop
upper_bound_pop = sample_mean + margin_of_error_pop

print("\n99% Confidence Interval (using population std):")
print(f"Lower Bound: {lower_bound_pop:.4f} million characters")
print(f"Upper Bound: {upper_bound_pop:.4f} million characters")


99% Confidence Interval (using population std):
Lower Bound: 1.1057 million characters
Upper Bound: 1.3717 million characters


In [5]:
# Explanation and Rationale:

print("\nExplanation and Rationale:")

print("\nPart a:")
print("Since the population standard deviation is unknown, we use the sample standard deviation to estimate it.  Because we're using the sample standard deviation, the t-distribution is appropriate. The t-distribution accounts for the added uncertainty introduced by estimating the population standard deviation from the sample. We calculate the t-critical value based on the desired confidence level (99%) and the degrees of freedom (n-1). The margin of error is then calculated using the t-critical value, sample standard deviation, and sample size. Finally, the confidence interval is constructed by adding and subtracting the margin of error from the sample mean.")

print("\nPart b:")
print("When the population standard deviation is known, we can use the z-distribution.  This is because we have precise knowledge of the variability of the population. We calculate the z-critical value based on the confidence level. The margin of error is calculated using the z-critical value, population standard deviation, and sample size. The confidence interval is then constructed around the sample mean, as in part a, but using the z-based margin of error.")



Explanation and Rationale:

Part a:
Since the population standard deviation is unknown, we use the sample standard deviation to estimate it.  Because we're using the sample standard deviation, the t-distribution is appropriate. The t-distribution accounts for the added uncertainty introduced by estimating the population standard deviation from the sample. We calculate the t-critical value based on the desired confidence level (99%) and the degrees of freedom (n-1). The margin of error is then calculated using the t-critical value, sample standard deviation, and sample size. Finally, the confidence interval is constructed by adding and subtracting the margin of error from the sample mean.

Part b:
When the population standard deviation is known, we can use the z-distribution.  This is because we have precise knowledge of the variability of the population. We calculate the z-critical value based on the confidence level. The margin of error is calculated using the z-critical value, popul