#  Hypothesis Testing <hr style="border:2.5px solid #460453"></hr>

<b><i>Rica Galagate</i></b>
<br>Computer Engineer/Technician</br>

In [22]:
#%pip install statsmodels --upgrade

In [23]:
# imports and packages
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestPower

In [24]:
# load dataset
df = pd.read_csv(r"CSV_files-Endterm\defects-30-sample.csv",
                 delimiter=",",
                 index_col="Sample")
df

Unnamed: 0_level_0,Defects
Sample,Unnamed: 1_level_1
1,13
2,12
3,10
4,11
5,10
6,6
7,12
8,12
9,9
10,15


In [25]:
# summary of dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 1 to 30
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Defects  30 non-null     int64
dtypes: int64(1)
memory usage: 480.0 bytes


In [26]:
# summary of statistics
df.describe()

Unnamed: 0,Defects
count,30.0
mean,11.033333
std,2.566137
min,5.0
25%,9.25
50%,11.5
75%,13.0
max,15.0


# Two-tailed Test

In [27]:
""" 
Ho: pop_mean = sample_mean
Ha: pop_mean != sample_mean
"""

# parameters
pop_mean = 10.32
pop_std = 3.17
sample_mean = df['Defects'].mean()
n = df['Defects'].count()

# z-score
Z_score = (sample_mean-pop_mean)/(pop_std/np.sqrt(n))
print(f"Z-score: {Z_score:.4f}")

# p-value
p_value = 2*(1-stats.norm.cdf(Z_score))
print(f'p-value: {p_value:.4f}')

# verdict
alpha = 0.05
if p_value < alpha:
    # reject null hypothesis
    print("The average defect rate is significantly different from 10.32 units.")
else:
    # fail to reject null hypothesis
    print("The average defect rate are the same.")

Z-score: 1.2325
p-value: 0.2178
The average defect rate are the same.


In [28]:
"""
Power of the Test
-----------------
"""

# cohen's d - a standardized effect size used to quantify the difference between two means in terms of standard deviations
sample_mean = 11.03
pop_mean = 10.32
pop_std = 3.17

d = abs((sample_mean - pop_mean)/pop_std)
print(f"Cohen's d: {d:.4f}")

power = TTestPower().power(
    effect_size = d,
    nobs = 30,
    alpha = 0.05,
    alternative='two-sided'
)
print(f"Power of the test: {power*100:.2f}%")

Cohen's d: 0.2240
Power of the test: 22.04%


In [29]:
"""
22% chance of correctly rejecting the null hypothesis when it is false.
78% chance of committing a type II error.
"""

# 80% power sample size
sample_80p = TTestPower().solve_power(
    effect_size = d,
    power= 0.8, # target power
    alpha = 0.05,
    alternative='two-sided'
)
print(f"Sample size needed for 80% power: {sample_80p:.0f}")

Sample size needed for 80% power: 158


# One-Tailed Test

In [30]:
""" 
Ho: pop_mean = sample_mean
Ha: pop_mean < sample_mean
"""

# parameters
pop_mean = 10.32
pop_std = 3.17
sample_mean = df['Defects'].mean()
n = df['Defects'].count()

# z-score
Z_score = (sample_mean-pop_mean)/(pop_std/np.sqrt(n))
print(f"Z-score: {Z_score:.4f}")

# p-value
p_value = (1-stats.norm.cdf(Z_score))
print(f'p-value: {p_value:.4f}')

# verdict
alpha = 0.05
if p_value < alpha:
    # reject null hypothesis
    print("The average defect rate increases.")
else:
    # fail to reject null hypothesis
    print("The average defect rate are the same.")

Z-score: 1.2325
p-value: 0.1089
The average defect rate are the same.


In [31]:
"""
Power of the Test
-----------------
"""

# cohen's d - a standardized effect size used to quantify the difference between two means in terms of standard deviations
sample_mean = 11.03
pop_mean = 10.32
pop_std = 3.17

d = abs((sample_mean - pop_mean)/pop_std)
print(f"Cohen's d: {d:.4f}")

power = TTestPower().power(
    effect_size = d,
    nobs = 30,
    alpha = 0.05,
    alternative='larger'
)
print(f"Power of the test: {power*100:.2f}%")

# 80% power sample size
sample_80p = TTestPower().solve_power(
    effect_size = d,
    power= 0.8, # target power
    alpha = 0.05,
    alternative='larger'
)
print(f"Sample size needed for 80% power: {sample_80p:.0f}")

Cohen's d: 0.2240
Power of the test: 32.75%
Sample size needed for 80% power: 125
