# Z-test problems

Source [The Z-test](https://courses.washington.edu/psy315/tutorials/z_test_tutorial.pdf)

Functions used are:

1. Getting p or area under the curve for a given z_value using scipy.stats.norm.cdf (z_value) (Cumulative Distribution Function)
2. Given p get the z-value, useful for critical z-value at a given alpga

Note that if we are give samples we can use scipy.stats.zscore to calculate the z_score.

In [None]:
# Imports
from statsmodels.stats.weightstats import ztest
from scipy.stats import norm
import numpy as np

# Problem 1
Suppose the arousal of hot cats has a population that is normally distributed with a
standard deviation of 6. Tomorrow you sample 49 hot cats from this population and obtain
a mean arousal of 46.44 and a standard deviation of 5.6968.
Using an alpha value of α = 0.01, is this observed mean significantly less than an expected
arousal of 47?

In [129]:
# Given
n = 49.0
alpha = 0.01
z_critical_01 = norm.ppf(alpha)
population_std_dev = 6.0
population_mean = 47.00
sample_mean = 46.44
sample_std_dev = 5.6968

# H_0 = arousal_mean == 47.00
# H_a = aroudal_mean < 47 (one sided)
# Left tail

In [89]:
from numpy import sqrt
# Manual calculations
z_sample = (sample_mean - population_mean) / (population_std_dev / sqrt(n))
p_value = norm.cdf(z_sample)

# Given a p-value find the z_score, reverse calculation
z_score_evaluated = norm.ppf(p_value)
print(f"z_sample: {z_sample}, p={p_value}, evaluated z= {z_score_evaluated}")
print(f"critical z = {z_critical_01} for alpha={alpha}")

z_sample: -0.653333333333336, p=0.2567707037112666, evaluated z= -0.653333333333336
critical z = -2.3263478740408408 for alpha=0.01


In [90]:
# Test Hypothesis, method 1, critical z-score
if z_sample > z_critical_01:
    print(f"Null Hypothesis accepted. z: {z_sample:0.4f} > {z_critical_01:0.4f}")
    print("arousal_mean is NOT significant that population_mean")
else:
    print(
        f"Alternate hypothesis accepted: z: {z_sample:0.4f} < {z_critical_01:0.4f}")
    print("arousal_mean IS significantly LESS than population_mean")

Null Hypothesis accepted. z: -0.6533 > -2.3263
arousal_mean is NOT significant that population_mean


In [91]:
# Test hypothesis, method two alpha
if p_value > alpha:
    print(f"Null Hypothesis accepted. {p_value:0.4f} > {alpha:0.4f}")
    print("arousal_mean is NOT significant that population_mean")
else:
    print(f"Alternate hypothesis accepted: {p_value:0.4f} < {alpha:0.4f}")
    print("arousal_mean IS significantly LESS than population_mean")

Null Hypothesis accepted. 0.2568 > 0.0100
arousal_mean is NOT significant that population_mean


# Problem 2
Suppose the jewelry of exams has a population that is normally distributed with
a standard deviation of 5. You are walking down the street and sample 9 exams from this
population and obtain a mean jewelry of 28.95 and a standard deviation of 6.3802.
Using an alpha value of α = 0.01, is this observed mean significantly different than an
expected jewelry of 27?

In [None]:
# Given
population_mean = 27.0
population_std_dev = 5.0
n = 9.0
sample_mean = 28.95
sample_std_dev = 6.3802
alpha = 0.01 / 2 # Two tailed test

# H_0 = observed_mean ==  population_mean
# H_a = observed_mean ==/== population_mean
# Two tailed test

In [93]:
# Calculations
z_score = (sample_mean - population_mean) / (population_std_dev / sqrt(n))
z_critical_01 = norm.ppf(1 - alpha)
p_value = norm.cdf(z_score) # Left of the curve
# Adjust to two tail
p_adjusted = (1-p_value) * 2

In [94]:
# Result, alpha
if p_adjusted > alpha:
    print(f"Null accepted, p: {p_adjusted:0.4f} > {alpha:0.4f}")
else:
    print(f"Alternate accepted, p: {p_adjusted:0.4f} < {alpha:0.4f}")

Null accepted, p: 0.2420 > 0.0050


In [95]:
# Result, z_score
if abs(z_score) < z_critical_01:
    print(f"Null accepted, p: {z_score:0.4f} > {z_critical_01:0.4f}")
else:
    print(f"Alternate accepted, p: {z_score:0.4f} < {z_critical_01:0.4f}")

Null accepted, p: 1.1700 > 2.5758


# Problem 3

Suppose the courage of psychologists has a population that is normally distributed with
a standard deviation of 10. You decide to sample 57 psychologists from this population
and obtain a mean courage of 34.81 and a standard deviation of 9.0579.
Using an alpha value of α = 0.05, is this observed mean significantly greater than an
expected courage of 34?

In [130]:
# Given
population_std_dev = 10.0
population_mean = 34.0
n = 57.0
sample_mean = 34.81
sample_std_dev = 34.81
alpha = 0.05

# H_0: psy_mean == population_std_dev
# H_1: psy_mean > population_std_dev

# Right tail test

In [None]:
# Calculations
z_sample = (sample_mean - population_mean) / (population_std_dev / sqrt(n))
z_critical_05 = -norm.ppf(alpha) # Upper side

# z_sample is > 0
p_value = norm.cdf(z_sample)
p_value_right_tail = 1 - p_value

In [132]:
# Analysis, z_crtical
if z_sample < z_critical_05:
    print(f"Accept H_0: z sample: {z_sample:0.4f} critica: {z_critical_05:0.4f}")
else:
    print(
        f"Accept H_a: z sample: {z_sample:0.4f} critica: {z_critical_05:0.4f}")

Accept H_0: z sample: 0.6115 critica: 1.6449


In [133]:
# Analysis
if p_value_right_tail > alpha:
    print(
        f"Accept H_0: p: {p_value_right_tail:0.4f} critica: {alpha:0.4f}")
else:
    print(
        f"Accept H_a: p sample: {p_value_right_tail:0.4f} critica: {alpha:0.4f}")

Accept H_0: p: 0.2704 critica: 0.0500


# Problem 4

Variation of problem 3

Suppose the courage of psychologists has a population that is normally distributed with
a standard deviation of 10. You decide to sample 57 psychologists (use np random to create samples)
Using an alpha value of α = 0.05, is this observed mean significantly greater than an
expected courage of 34?

In [139]:
# Input
alpha = 0.05
population_mean = 34.0
population_std_dev = 10.0
n = 57
# Generate n samples
np.random.seed(n)
samples = np.random.normal(34.0, 15.0, n)
sample_mean = samples.mean()
sample_std_dev = samples.std()
print(f"Samples mean = {sample_mean:0.4f}, std dev: {sample_std_dev:0.4f}")

Samples mean = 35.6246, std dev: 16.5317


In [143]:
# Calculations, note z_sample is positive
z_sample = (sample_mean - population_mean) / (population_std_dev / sqrt(n))
z_critical_05 = -norm.ppf(alpha) # Upper side
print(f"sample: {z_sample:0.4f}, critical = {z_critical_05:0.4f}")

p_value = 1 - norm.cdf(z_sample) # Right side
print(f"pvalue = {p_value:0.4f}")

sample: 1.2265, critical = 1.6449
pvalue = 0.1100


In [144]:
# Analysis, z_crtical
if z_sample < z_critical_05:
    print(
        f"Accept H_0: z sample: {z_sample:0.4f} critica: {z_critical_05:0.4f}")
else:
    print(
        f"Accept H_a: z sample: {z_sample:0.4f} critica: {z_critical_05:0.4f}")

Accept H_0: z sample: 1.2265 critica: 1.6449


## Solution 2
Use ztest from statsmodel library.

In [145]:
# Use zscore
z_sample_ztest, p_value_ztest = ztest(
    x1=samples, value=population_mean, alternative="larger", ddof=1.0)
print(f"z = {z_sample_ztest:0.4f}, p = {p_value_ztest:0.4f}")

z = 0.7354, p = 0.2310


In [148]:
if p_value_ztest > alpha:
    print(
        f"Accept H_0: z sample: {p_value_ztest:0.4f} > {alpha:0.4f}")
else:
    print(
        f"Accept H_a: z sample: {p_value_ztest:0.4f} < {alpha:0.4f}")

Accept H_0: z sample: 0.2310 > 0.0500


In [146]:
# Reconcile with manual calculations
sample_mean_reconciled = samples.mean()
sample_std_dev_reconciled = samples.std(ddof=1.0)
z_sample_reconciled = (sample_mean_reconciled - population_mean) / \
    (sample_std_dev_reconciled / sqrt(n))
p_value_reconciled = 1 - norm.cdf(z_sample_reconciled) # Larger tail
print(f"Samples mean = {sample_mean:0.4f}, std dev: {sample_std_dev:0.4f}")
print(f"z_sample_reconciled: {z_sample_reconciled:0.4f}")
print(f"p_value_reconciled: {p_value_reconciled:0.4}")

Samples mean = 35.6246, std dev: 16.5317
z_sample_reconciled: 0.7354
p_value_reconciled: 0.231


In [161]:
# Testing zscore in scipy
from scipy.stats import zscore
import numpy as np

# Create a test stats normal distribution
data_mean, data_std_dev = 100_000.0, 40_000.0
np.set_printoptions(formatter={'float_kind': lambda x: "%.2f" % x})
np.random.seed(10)
data = np.random.normal(data_mean, data_std_dev, size=10)
sample_mean, sample_std_dev = data.mean(), data.std()
print("Sample data: ", data)
print(f"Sample mean: {sample_mean:0.2f}, std: {sample_std_dev:0.2f}")
data_zscores = zscore(data)
print("Normalized data: ", data_zscores)
data_inverse = np.array(
    [z * sample_std_dev + sample_mean for z in data_zscores])
print("Regenrated data: ", data_inverse)

Sample data:  [153263.46 128611.16 38183.99 99664.65 124853.44 71196.58 110620.46
 104341.94 100171.66 93015.99]
Sample mean: 102392.33, std: 30079.02
Normalized data:  [1.69 0.87 -2.13 -0.09 0.75 -1.04 0.27 0.06 -0.07 -0.31]
Regenrated data:  [153263.46 128611.16 38183.99 99664.65 124853.44 71196.58 110620.46
 104341.94 100171.66 93015.99]
