In [None]:
# Section 1: Experiment Design & Hypothetical Power Analysis

import numpy as np
from statsmodels.stats.power import zt_ind_solve_power
from statsmodels.stats.proportion import proportion_effectsize

# Define Parameters for Power Analysis (In a real project, these are defined BEFORE the test.)

# We use the control group's rate as our baseline. From our results, this was ~1.79%.
baseline_rate = 0.0179

# Define our Minimum Detectable Effect (MDE).
# This is the smallest improvement we would consider a business success.
# For this test, we're looking for at least a 0.5% absolute increase in the engagement rate.
mde_absolute = 0.005
mde_absolute = 0.005
target_rate = baseline_rate + mde_absolute

alpha = 0.05  # 5% chance of a false positive
power = 0.80  # 80% chance of detecting a real effect

# Calculate Required Sample Size
effect_size = proportion_effectsize(target_rate, baseline_rate)

required_sample_size = zt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    alternative='larger'  # one-tailed larger
)

print(f"Baseline Rate: {baseline_rate:.2%}")
print(f"Minimum Detectable Effect (MDE): {mde_absolute:+.2%}")
print(f"Target Rate: {target_rate:.2%}")
print("-" * 30)
print(f"To reliably detect an absolute lift of {mde_absolute:+.2%},")
print(f"we would need a sample size of approximately {int(np.ceil(required_sample_size))} users per group.")

Baseline Rate: 1.79%
Minimum Detectable Effect (MDE): +0.50%
Target Rate: 2.29%
------------------------------
To reliably detect an absolute lift of +0.50%,
we would need a sample size of approximately 9848 users per group.


In [None]:
# 2-01 Import Libraries and Load data

import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

from google.colab import drive
drive.mount('/content/drive')

file_path = "/content/drive/MyDrive/engagement_summary.csv"
try:
    summary_df = pd.read_csv(file_path)
    print("Summary data loaded successfully:")
    print(summary_df)
except FileNotFoundError:
    print(f"File not found at: {file_path}")

Mounted at /content/drive
Summary data loaded successfully:
   message_type  n_users  n_engaged  engagement_rate
0  Personalized   564577      14423         0.025547
1       Generic    23524        420         0.017854


In [None]:
# 2-02 Hypothesis Testing (proportions Z-test)

# Prepare data for the Z-test
count = summary_df['n_engaged'] # Number of engaged users in each group (successes).
nobs = summary_df['n_users']    # Total number of users in each group (observations).

# Perform a two-sided Z-test.
z_stat, p_value = proportions_ztest(count=count, nobs=nobs)

print("\n--- Z-test Results ---")
print(f"Z-statistic: {z_stat:.20f}")
print(f"P-value: {p_value:.20f}")


--- Z-test Results ---
Z-statistic: 7.37007812654541449859
P-value: 0.00000000000017052807


In [None]:
# 2-03 Result

alpha = 0.05  # Set the significance level (alpha) to 5%.

if p_value < alpha:
    print(f"Statistically Significant (p-value < {alpha}).")
    print("We can conclude that 'message_type' has a significant effect on the Engagement Rate.")

    # Calculate lift to see which version is better and by how much.
    rate_generic = summary_df[summary_df['message_type'] == 'Generic']['engagement_rate'].iloc[0]
    rate_personalized = summary_df[summary_df['message_type'] == 'Personalized']['engagement_rate'].iloc[0]

    absolute_lift = rate_personalized - rate_generic
    relative_lift = (rate_personalized - rate_generic) / rate_generic

    print(f"-> Personalized Rate: {rate_personalized:.2%}")
    print(f"-> Generic Rate (Control): {rate_generic:.2%}")
    print(f"Absolute Lift: {absolute_lift:+.2%}")
    print(f"Relative Lift: {relative_lift:+.2%}")

else:
    print(f"Not Statistically Significant (p-value >= {alpha}).")
    print("We do not have enough evidence to conclude that there is a difference between the message types.")


Statistically Significant (p-value < 0.05).
We can conclude that 'message_type' has a significant effect on the Engagement Rate.
-> Personalized Rate: 2.55%
-> Generic Rate (Control): 1.79%
Absolute Lift: +0.77%
Relative Lift: +43.09%


In [None]:
# 2-04 Calculate Confidence Interval and Conslusion

n_personalized = summary_df.loc[summary_df['message_type'] == 'Personalized', 'n_users'].iloc[0]
n_engaged_personalized = summary_df.loc[summary_df['message_type'] == 'Personalized', 'n_engaged'].iloc[0]

n_generic = summary_df.loc[summary_df['message_type'] == 'Generic', 'n_users'].iloc[0]
n_engaged_generic = summary_df.loc[summary_df['message_type'] == 'Generic', 'n_engaged'].iloc[0]

std_err_diff = np.sqrt((rate_personalized * (1 - rate_personalized) / n_personalized) + \
                       (rate_generic * (1 - rate_generic) / n_generic))

margin_of_error = 1.96 * std_err_diff
ci_low = absolute_lift - margin_of_error
ci_high = absolute_lift + margin_of_error

print(f"Absolute Lift: {absolute_lift:+.2%}")
print(f"Margin of Error: {margin_of_error:.2%}")
print(f"We are 95% confident that personalizing messages increases the engagement rate by {ci_low:+.2%} to {ci_high:+.2%} in absolute terms.")

Absolute Lift: +0.77%
Margin of Error: 0.17%
We are 95% confident that personalizing messages increases the engagement rate by +0.60% to +0.94% in absolute terms.
