In [29]:
# Import dependencies
import numpy as np
import pandas as pd
import scipy.stats as stats
from statsmodels.stats.proportion import proportions_ztest
import statsmodels.formula.api as sm

# Scenario

- You are a UX researcher on a product team for a language learning app. 
- You are tasked with testing two versions of a sound effect that plays when the learner provides a correct answer, and how this affects their likelihood of completing the lesson.  


In [17]:
### Generate toy dataset from language learning app user data
np.random.seed(11052025)  

data = {
    'user_id': range(1, 101),
    'group': np.random.choice(['Version A', 'Version B'], size=100),
    'age_group': np.random.choice(['18-24', '25-34', '35-44', '45+'], size=100),
    'gender': np.random.choice([0, 1, 2], size=100, p=[0.49, 0.49, 0.02]), 
}

# Add lessons_completed based on group
data['lesson_completed'] = [
    np.random.choice([0, 1], p=[0.53, 0.47]) if grp == 'Version A' else np.random.choice([0, 1], p=[0.57, 0.43])
    for grp in data['group']
]

# Convert dictionary to pandas df
data_df = pd.DataFrame(data)

data_df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
user_id,100.0,,,,50.5,29.011492,1.0,25.75,50.5,75.25,100.0
group,100.0,2.0,Version A,54.0,,,,,,,
age_group,100.0,4.0,25-34,30.0,,,,,,,
gender,100.0,,,,0.55,0.575159,0.0,0.0,1.0,1.0,2.0
lesson_completed,100.0,,,,0.48,0.502117,0.0,0.0,0.0,1.0,1.0


In [18]:
### Observe naive difference in proportions
# Calculate completion rates
completion_rates = data_df.groupby('group')['lesson_completed'].mean()
print("Completion Rates by Group:")
print(completion_rates)

# What is the naive conclusion?


Completion Rates by Group:
group
Version A    0.444444
Version B    0.521739
Name: lesson_completed, dtype: float64


In [25]:
### Conduct two-proportion z-test by hand
# Get proportions and difference
proportionA = completion_rates['Version A']
proportionB = completion_rates['Version B']
diff_in_props = proportionA - proportionB
print(diff_in_props)


-0.07729468599033817


In [42]:
# Calculate standard error for difference in proportions

# Get group sample sizes
nA = data_df[data_df['group'] == 'Version A'].shape[0]
nB = data_df[data_df['group'] == 'Version B'].shape[0]

# Get completion counts for each group
completion_countA = data_df[data_df['group'] == 'Version A']['lesson_completed'].sum()
completion_countB = data_df[data_df['group'] == 'Version B']['lesson_completed'].sum()

# Get pooled proportion
pooled_prop = (completion_countA + completion_countB)/(nA + nB)

# Get standard error
std_error = np.sqrt(pooled_prop * (1 - pooled_prop) * (1/nA + 1/nB))

In [40]:
print(std_error)


0.019626632345167736


In [43]:
# Calculate z-statistic and confidence interval
z_stat = diff_in_props / std_error
print(f"Z-statistic: {z_stat}")

# Calculate p-value
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
print(f"P-value: {p_value}")

# Calculate 95% confidence interval
threshold_95 = 1.96
ci_lower = diff_in_props - threshold_95 * std_error
ci_upper = diff_in_props + threshold_95 * std_error
print(f"95% Confidence Interval: ({ci_lower}, {ci_upper})")

# Does the conclusion change?


Z-statistic: -0.7710865759549871
P-value: 0.44065561691238964
95% Confidence Interval: (-0.27376754554366883, 0.11917817356299248)


In [38]:
### Use statsmodels for two-proportion z-test

completion_counts = np.array([completion_countA, completion_countB])
n_obs = data_df.groupby('group').size().values
z_stat, p_val = proportions_ztest(completion_counts, n_obs, prop_var=0)
print(f"Z-statistic: {z_stat}, P-value: {p_val}")


Z-statistic: -0.7710865759549871, P-value: 0.44065561691238964


In [21]:
### Recalculate using logistic regression, then add covariates

# Fit logistic regression model
logit_model1 = sm.logit('lesson_completed ~ group', data=data_df)
result1 = logit_model1.fit()
print(result1.summary())
               

Optimization terminated successfully.
         Current function value: 0.689372
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:       lesson_completed   No. Observations:                  100
Model:                          Logit   Df Residuals:                       98
Method:                           MLE   Df Model:                            1
Date:                Wed, 05 Nov 2025   Pseudo R-squ.:                0.004297
Time:                        14:09:04   Log-Likelihood:                -68.937
converged:                       True   LL-Null:                       -69.235
Covariance Type:            nonrobust   LLR p-value:                    0.4405
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept             -0.2231      0.274     -0.815      0.415      -0.760       0.314
group

In [44]:
# Add covariates for age_group and gender

# Fit logistic regression model with covars
logit_model2 = sm.logit('lesson_completed ~ group + age_group + gender' , data=data_df)
result2 = logit_model2.fit()
print(result2.summary())
               

Optimization terminated successfully.
         Current function value: 0.660708
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:       lesson_completed   No. Observations:                  100
Model:                          Logit   Df Residuals:                       94
Method:                           MLE   Df Model:                            5
Date:                Wed, 05 Nov 2025   Pseudo R-squ.:                 0.04570
Time:                        18:22:09   Log-Likelihood:                -66.071
converged:                       True   LL-Null:                       -69.235
Covariance Type:            nonrobust   LLR p-value:                    0.2756
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept              0.4048      0.470      0.860      0.390      -0.517       1.327
group