In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import t

In [86]:
# Load data into Python
CH01PR21 = pd.read_excel("C:\\Users\\kwame\\Downloads\\1.21.xlsx",  names=["broken", "transfer"])

In [87]:
# Fit the linear regression model
X = CH01PR21['transfer']
y = CH01PR21['broken']
X = sm.add_constant(X)  # Add an intercept term

model = sm.OLS(y, X).fit()

##### a. Estimate Beta_1 with a 95% CI and interpret

In [88]:
# Calculate confidence intervals for the coefficients with a 95% confidence level
confidence_intervals = model.conf_int(alpha=0.05)

# Print the confidence intervals
print(confidence_intervals)

                 0          1
const     8.670370  11.729630
transfer  2.918388   5.081612


Thus, the increase in number of broken ampules for every additional transfer is likely to be between 2.91 and 5.08 ampules.

##### b. Conduct a t test for HO: Beta_1 = 0

In [89]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 broken   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     72.73
Date:                Wed, 18 Oct 2023   Prob (F-statistic):           2.75e-05
Time:                        00:44:06   Log-Likelihood:                -17.016
No. Observations:                  10   AIC:                             38.03
Df Residuals:                       8   BIC:                             38.64
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.2000      0.663     15.377      0.0



If Beta_1 is actually 0, the probability of observing b_1 equal to or more extreme than 4.0 is < 0.00001. Thus, there is sufficient evidence to reject HO and conclude that there is a linear association between the number of transfers and the number of broken ampules. We can also see that from the confidence interval, which is between 2.918 and 5.082 excludes zero (0), which confirms that there is a relationship.

##### c. Estimate Beta_0 with a 95% CI and interpret

In [90]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 broken   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     72.73
Date:                Wed, 18 Oct 2023   Prob (F-statistic):           2.75e-05
Time:                        00:44:06   Log-Likelihood:                -17.016
No. Observations:                  10   AIC:                             38.03
Df Residuals:                       8   BIC:                             38.64
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.2000      0.663     15.377      0.0



The number of broken ampules in a box without transfers is likely to be between 8.670 and 11.730 because intervals constructed in this manner cover the 'true value' of Beta_0 95% of the time.

##### d. Conduct a t-test for HO: Beta_0 <= 9 at the 0.025 level of significance

In [91]:
# Specify the null hypothesis value (e.g., Beta_0 <= 9)
null_hypothesis_value = 9

# Get the intercept coefficient estimate and its standard error
intercept_estimate = model.params[0]
intercept_std_error = model.bse[0]

# Calculate the degrees of freedom
degrees_of_freedom = model.df_resid

# Calculate the t-statistic
t_statistic = (intercept_estimate - null_hypothesis_value) / intercept_std_error

# Calculate the p-value for a one-tailed test
p_value = 1 - t.cdf(abs(t_statistic), degrees_of_freedom)

# Set the significance level
alpha = 0.025

# Check if the p-value is less than the significance level
is_reject_null = p_value < alpha

print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")
print(f"Is the null hypothesis significant at the 0.025 level? {is_reject_null}")


T-statistic: 1.8090680674665813
P-value: 0.05402226806321897
Is the null hypothesis significant at the 0.025 level? False


  intercept_estimate = model.params[0]
  intercept_std_error = model.bse[0]


##### e. Power of test 

In [92]:
# Setting the parameters
alpha = 0.05
df = 8  # Degrees of freedom
effect_size = 2
non_central_param = effect_size / 0.5
t_alpha = t.ppf(1 - alpha, df)

# Calculate the power
power = 1 - t.cdf(t_alpha, df, loc=non_central_param)

print(f"Power is: {power}")

Power is: 0.9676345875387828
