In [10]:
import numpy as np
import pandas as pd
from scipy.stats import t, norm

In [11]:
df = np.array([1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85, 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29])

# A: 99% Confidence Interval Using Sample Standard Deviation

 calculate the sample mean

In [12]:

sample_mean = np.mean(df)
sample_mean

1.2386666666666666

calculate the sample standard deviation

In [13]:


sample_std = np.std(df, ddof=1)  
sample_std


#  ddof=1 is used when calculating the sample standard deviation 
#  ddof=0 would be used for the population standard deviatio

0.19316412956959936

In [14]:
#  determine t-value Since the sample size is small (n < 30), we will use the t-distribution. 
#  The t-value can be found using the degrees of freedom (n-1) and the desired confidence level.

In [15]:
df.shape

(15,)

 calculate n, confidence level, and alpha

In [25]:
n = len(df)
confidence_level = 0.99
alpha = 1 - confidence_level


n,confidence_level,alpha

(15, 0.99, 0.010000000000000009)

 calculate the t-value for 99% confidence level

In [26]:
t_value = t.ppf(1 - alpha/2, df=n-1)
t_value

2.97684273411266

 calculate the margin of error and confidence interval

In [28]:

margin_of_error = t_value * (sample_std / np.sqrt(n))
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)


In [29]:
margin_of_error,confidence_interval

(0.1484693282152996, (1.090197338451367, 1.3871359948819662))

In [31]:
print(f"A: 99% Confidence Interval Using Sample Standard Deviation")
print(f"Sample Mean: {sample_mean:.2f}")
print(f"Sample Standard Deviation: {sample_std:.2f}")
print(f"99% Confidence Interval: {confidence_interval[0]:.2f} to {confidence_interval[1]:.2f}")

A: 99% Confidence Interval Using Sample Standard Deviation
Sample Mean: 1.24
Sample Standard Deviation: 0.19
99% Confidence Interval: 1.09 to 1.39


In [44]:
# the interval from 1.09 to 1.39 suggests that we are 99 % confident the true population parameter lies within this 
# range this means if we took many samples and calculated a confidence interval from each one about 99 percent of those
# intervals would include the true parameter

Explain the steps you take and the rationale behind using the t-distribution for this task.

In [None]:
#why did we use t- distribution?
# [1] the sample size is small n = 15 which makes t-distribution more appropriate than the normal distribution
#[2] since the population standard deviation is not known and is estimated from the sample, the t-distribution is used.

# B: 99% Confidence Interval Using Known Population Standard Deviation

 population standard deviation

In [33]:
population_std = 0.2  # in millions of characters

In [35]:
z_value = norm.ppf(1 - alpha/2)
z_value

#The z-distribution is used when the sample size is large (typically n > 30) or when the population standard deviation 
# is known

2.5758293035489004

 Calculate the margin of error and confidence interval

In [37]:

margin_of_error_population = z_value * (population_std / np.sqrt(n))
confidence_interval_population = (sample_mean - margin_of_error_population, sample_mean + margin_of_error_population)

margin_of_error_population,confidence_interval_population

(0.13301525327090588, (1.1056514133957607, 1.3716819199375725))

In [42]:
print(f"B: 99% Confidence Interval with Known Population Std Deviation")

print(f"99% Confidence Interval: {confidence_interval_population[0]:.2f} to {confidence_interval_population[1]:.2f}")

B: 99% Confidence Interval with Known Population Std Deviation
99% Confidence Interval: 1.11 to 1.37


In [43]:
# the interval from 1.11 to 1.37 suggests that we are 99 % confident the true population parameter lies within this 
# range this means if we took many samples and calculated a confidence interval from each one about 99 percent of those
# intervals would include the true parameter