# Power of Test

In [1]:
'''
Quality Control Manager - Chocolate Factory.
Responsible for maintaining the average weight of chocolate bars produced.

average = 50 grams
standard deviation = 2 grams
sample size = 30
significance level = 0.05

data = [55, 45, 52, 48, 55, 52, 52, 53, 48, 52, 53, 47, 54, 51, 52, 51, 48, 52, 53, 54, 51, 51, 52, 54, 47, 52, 53, 48, 51, 54]

H0: sample_mean = average = 50
Ha: sample_mean != average

CALCULATE POWER!
'''

'\nQuality Control Manager - Chocolate Factory.\nResponsible for maintaining the average weight of chocolate bars produced.\n\naverage = 50 grams\nstandard deviation = 2 grams\nsample size = 30\nsignificance level = 0.05\n\ndata = [55, 45, 52, 48, 55, 52, 52, 53, 48, 52, 53, 47, 54, 51, 52, 51, 48, 52, 53, 54, 51, 51, 52, 54, 47, 52, 53, 48, 51, 54]\n\nH0: sample_mean = average = 50\nHa: sample_mean != average\n\nCALCULATE POWER!\n'

In [2]:
import numpy as np
from scipy import stats
from statsmodels.stats import power



In [9]:
population_mean = 50
population_standard_deviation = 2
sample_size = 30
alpha = 0.05

In [10]:
confidence_level = 1 - (alpha / 2) # 95% confidence level in a two tailed test

In [11]:
z_critical = np.abs(round(stats.norm.isf(q = alpha / 2), 4))

In [12]:
z_critical

1.96

In [13]:
data = [55, 45, 52, 48, 55, 52, 52, 53, 48, 52, 53, 47, 54, 51, 52, 51, 48, 52, 53, 54, 51, 51, 52, 54, 47, 52, 53, 48, 51, 54]

In [14]:
sample_mean = np.mean(data)
sample_standard_deviation = np.std(data)

In [15]:
effective_size = (sample_mean - population_mean) / sample_standard_deviation

In [16]:
effective_size

0.5261336417646574

In [17]:
power = power.zt_ind_solve_power(
    effect_size = effective_size,
    nobs1 = sample_size,
    alpha = alpha,
    ratio = 0,
    alternative = "two-sided"
)

In [18]:
power

0.8216812302268112

# Two Sample Z Test

In [19]:
'''
Example 1 ->

FDA - Two different medicines M1 and M2 have equal recovery time.

H0: t1 = t2
Ha: t1 != t2

For M1, you have data from 100 (n1) patients. -> x1 (mean)
For M2, you have data from 90 (n2) patients. -> x2 (mean)
'''

'\nExample 1 ->\n\nFDA - Two different medicines M1 and M2 have equal recovery time.\n\nH0: t1 = t2\nHa: t1 != t2\n\nFor M1, you have data from 100 (n1) patients. -> x1 (mean)\nFor M2, you have data from 90 (n2) patients. -> x2 (mean)\n'

In [22]:
import random

random.seed(123)

m1_data = []

for _ in range(100):
    recovery_time = random.uniform(5.0, 20.0)
    m1_data.append(np.round(recovery_time))
    
print(m1_data)
    
m2_data = []

for _ in range(90):
    recovery_time = random.uniform(5.0, 30.0)
    m2_data.append(np.round(recovery_time))
    
print(m2_data)

[6.0, 6.0, 11.0, 7.0, 19.0, 6.0, 13.0, 10.0, 18.0, 7.0, 10.0, 10.0, 9.0, 5.0, 12.0, 6.0, 14.0, 6.0, 10.0, 12.0, 19.0, 6.0, 7.0, 17.0, 5.0, 19.0, 14.0, 9.0, 18.0, 17.0, 10.0, 17.0, 8.0, 14.0, 13.0, 17.0, 10.0, 11.0, 17.0, 13.0, 15.0, 15.0, 15.0, 18.0, 12.0, 15.0, 11.0, 5.0, 16.0, 8.0, 17.0, 19.0, 15.0, 8.0, 10.0, 11.0, 6.0, 12.0, 14.0, 10.0, 8.0, 6.0, 14.0, 5.0, 10.0, 8.0, 13.0, 16.0, 19.0, 15.0, 20.0, 19.0, 14.0, 9.0, 12.0, 8.0, 20.0, 12.0, 16.0, 6.0, 7.0, 8.0, 9.0, 10.0, 9.0, 12.0, 11.0, 7.0, 6.0, 10.0, 6.0, 11.0, 5.0, 13.0, 6.0, 15.0, 11.0, 20.0, 9.0, 12.0]
[24.0, 13.0, 14.0, 21.0, 29.0, 27.0, 15.0, 23.0, 22.0, 14.0, 11.0, 7.0, 27.0, 6.0, 18.0, 18.0, 9.0, 21.0, 28.0, 24.0, 15.0, 8.0, 29.0, 18.0, 21.0, 25.0, 15.0, 7.0, 8.0, 22.0, 14.0, 27.0, 24.0, 30.0, 22.0, 13.0, 25.0, 6.0, 18.0, 27.0, 18.0, 8.0, 18.0, 19.0, 14.0, 27.0, 14.0, 12.0, 18.0, 10.0, 13.0, 15.0, 28.0, 7.0, 10.0, 10.0, 30.0, 27.0, 10.0, 19.0, 7.0, 13.0, 14.0, 18.0, 28.0, 25.0, 18.0, 25.0, 17.0, 23.0, 24.0, 11.0, 26.0, 19.0,

In [24]:
from statsmodels.stats import weightstats as stests

In [25]:
z_score, p_val = stests.ztest(
    x1 = m1_data,
    x2 = m2_data,
    value = 0,
    alternative = "two-sided"
)

In [26]:
z_score

-7.68917478890992

In [27]:
p_val

1.4808703984296164e-14

In [28]:
# Since p_val is much less than alpha (0.01)
# We reject the null hypothesis -> Two medicines have different recovery times

In [29]:
'''
Example 2 ->

Car manufacturer comparing efficiencies of two engine models X and Y.

H0: Both have the same efficiency.
Ha: Both have different efficiencies.

X ->
    n = 50
    mean = 30
    std_dev = 3
    
Y ->
    n = 60
    mean = 32
    std_dev = 2.5
    
Significance Level = 0.05
'''

'\nExample 2 ->\n\nCar manufacturer comparing efficiencies of two engine models X and Y.\n\nH0: Both have the same efficiency.\nHa: Both have different efficiencies.\n\nX ->\n    n = 50\n    mean = 30\n    std_dev = 3\n    \nY ->\n    n = 60\n    mean = 32\n    std_dev = 2.5\n    \nSignificance Level = 0.05\n'

In [36]:
def two_sample_z_test(sample_mean_1, sample_mean_2, sample_sd_1, sample_sd_2, n1, n2):
    denominator = np.sqrt((sample_sd_1 ** 2 / n1) + (sample_sd_2 ** 2 / n2))
    z_score = (sample_mean_1 - sample_mean_2) / denominator
    
    return z_score

In [37]:
sample_mean_X = 30
sample_mean_y = 32
sample_sd_X = 3
sample_sd_Y = 2.5
sample_size_X = 50
sample_size_Y = 60

In [38]:
z_score = two_sample_z_test(
    sample_mean_X,
    sample_mean_y,
    sample_sd_X,
    sample_sd_Y,
    sample_size_X,
    sample_size_Y
)

In [39]:
z_score

-3.751832396884334

In [40]:
p_val = 2 * (1 - stats.norm.cdf(abs(z_score)))

In [41]:
p_val

0.00017554681014564366

In [42]:
# p_val is less than the alpha which means that I can reject the null hypothesis

### Conditions for Two Sample Z Test

- Populations must have finite and known means and std dev
- While sampling, size of sample >= 30
- Works only on continuous data
- Normal distribution in data
- n1 and n2 need not be the same

In [43]:
# Quiz 1

heights_a = [14, 16, 13, 17, 12, 18]
heights_b = [18, 19, 16, 17, 15, 20]

alpha = 0.1

z_stat, p_val = stests.ztest(
    heights_a,
    heights_b,
    value = 0,
    alternative = "two-sided"
)

In [44]:
z_stat

-2.029994857352875

In [45]:
p_val

0.042357062026854894

In [46]:
# p_val < alpha -> NULL HYPOTHESIS REJECTED!

# Z Proportions Test

In [47]:
'''
Product Manager at XYZ gauging customer satisfaction with new product.

H0: Proportion of satisfied customers is equal to 70%. 
Ha: Proportion of satisfied customers is not equal to specified value of 70%.
'''

'\nProduct Manager at XYZ gauging customer satisfaction with new product.\n\nH0: Proportion of satisfied customers is equal to 70%. \nHa: Proportion of satisfied customers is not equal to specified value of 70%.\n'

In [48]:
'''
Quiz 2 ->

A fast-food restaurant claims that 80% of their customers prefer their new burger over the old one.
In a random sample of 100 customers, 85 said they preferred the new burger.
What is the null and alternative hypothesis?
'''

'\nQuiz 2 ->\n\nA fast-food restaurant claims that 80% of their customers prefer their new burger over the old one.\nIn a random sample of 100 customers, 85 said they preferred the new burger.\nWhat is the null and alternative hypothesis?\n'

In [49]:
'''
Example ->

Product Manager at XYZ gauging customer satisfaction with new product.

H0: Proportion of satisfied customers is equal to 70%. 
Ha: Proportion of satisfied customers is not equal to specified value of 70%.

Sampling was done on 150 people and 115 expressed satisfaction.
'''

'\nExample ->\n\nProduct Manager at XYZ gauging customer satisfaction with new product.\n\nH0: Proportion of satisfied customers is equal to 70%. \nHa: Proportion of satisfied customers is not equal to specified value of 70%.\n\nSampling was done on 150 people and 115 expressed satisfaction.\n'

In [51]:
satisfied_customers = 115
total_customers = 150

target_satisfaction = 0.7

population_proportion = target_satisfaction
p = population_proportion

sample_proportion = satisfied_customers / total_customers
p_hat = sample_proportion

n = total_customers

Z = (p_hat - p) / np.sqrt((p * (1 - p)) / n)

p_val = 2 * (1 - stats.norm.cdf(np.abs(Z)))

In [53]:
p_val

0.07479137758694376

In [54]:
# Assuming alpha to be 0.05.