In [49]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from statsmodels.stats.proportion import proportions_ztest

In [64]:
# Step 1: Load the CSV data

In [9]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
num_users = 1000  # total number of users
conversion_rate_A = 0.2  # conversion rate for group A
conversion_rate_B = 0.25  # conversion rate for group B

# Generate user IDs
user_ids = np.arange(1, num_users + 1)

# Assign groups A or B randomly
groups = np.random.choice(['A', 'B'], size=num_users)

# Simulate conversions (1 for conversion, 0 for no conversion)
conversions = np.where(groups == 'A', 
                       np.random.binomial(1, conversion_rate_A, size=num_users), 
                       np.random.binomial(1, conversion_rate_B, size=num_users))

# Create the DataFrame
df = pd.DataFrame({
    'user_id': user_ids,
    'group': groups,
    'conversion': conversions
})

# Save it to a CSV file
df.to_csv('ab_test_data.csv', index=False)

print("Dataset 'ab_test_data.csv' has been created!")


Dataset 'ab_test_data.csv' has been created!


In [11]:
df = pd.read_csv('ab_test_data.csv')

In [13]:
# Step 2: Calculate conversion rates for both groups

In [29]:
# Group A (Control)
group_A = df[df['group'] == 'A']
conv_A = group_A['conversion'].sum()  # Total conversions for Group A
n_A = group_A.shape[0]  # Total number of users in Group A
cr_A = conv_A / n_A  # Conversion rate for Group A

In [31]:
# Group B (Treatment)
group_B = df[df['group'] == 'B']
conv_B = group_B['conversion'].sum()  # Total conversions for Group B
n_B = group_B.shape[0]  # Total number of users in Group B
cr_B = conv_B / n_B  # Conversion rate for Group B

In [33]:
# Output the conversion rates
print(f"Conversion Rate of Group A: {cr_A:.2%}")
print(f"Conversion Rate of Group B: {cr_B:.2%}")

Conversion Rate of Group A: 19.59%
Conversion Rate of Group B: 24.12%


In [55]:
# Step 3: Perform a two-proportion z-test

In [53]:
from statsmodels.stats.proportion import proportions_ztest

# Your data
successes = np.array([conv_A, conv_B])  # number of conversions
samples = np.array([n_A, n_B])          # total users in each group

# Perform Z-test
z_stat, p_val = proportions_ztest(successes, samples)

# Output results
print(f"\nZ-statistic: {z_stat:.4f}")
print(f"P-value: {p_val:.4f}")



Z-statistic: -1.7299
P-value: 0.0836


In [57]:
# Step 4: Interpret the results

In [61]:
alpha = 0.05
if p_val < alpha:
    print("\n The difference is statistically significant. Version B performs better.")
else:
    print("\n No significant difference. Keep using Version A.")


 No significant difference. Keep using Version A.
