In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('data/sample_ab_test.csv')
df.head()


In [None]:
df.info()
df['group'].value_counts()
df['converted'].value_counts()


In [None]:
group_A = df[df['group'] == 'A']
group_B = df[df['group'] == 'B']

conv_A = group_A['converted'].mean()
conv_B = group_B['converted'].mean()

print(f"Conversion Rate - Group A: {conv_A:.2%}")
print(f"Conversion Rate - Group B: {conv_B:.2%}")


In [None]:
from statsmodels.stats.proportion import proportions_ztest

success = [group_A['converted'].sum(), group_B['converted'].sum()]
nobs = [group_A.shape[0], group_B.shape[0]]

z_stat, p_val = proportions_ztest(success, nobs, alternative='two-sided')

print(f"Z-Statistic: {z_stat:.3f}")
print(f"P-Value: {p_val:.4f}")


In [None]:
alpha = 0.05  # for 95% confidence

if p_val < alpha:
    print("✅ Statistically significant difference found.")
    print("Better Group:", "B" if conv_B > conv_A else "A")
else:
    print("❌ No statistically significant difference.")


In [None]:
from statsmodels.stats.proportion import proportions_ztest


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Prepare data
plot_data = pd.DataFrame({
    'Group': ['A', 'B'],
    'Conversion Rate': [conv_A, conv_B]
})

# Plot
sns.set(style='whitegrid')
plt.figure(figsize=(6, 4))
sns.barplot(x='Group', y='Conversion Rate', data=plot_data, palette='pastel')
plt.title('Conversion Rate Comparison (A vs B)', fontsize=14)
plt.ylim(0, 1)
plt.ylabel('Conversion Rate')
plt.xlabel('Group')
plt.tight_layout()
plt.show()


In [None]:
df['converted_label'] = df['converted'].map({0: 'No', 1: 'Yes'})

plt.figure(figsize=(7, 5))
sns.countplot(x='group', hue='converted_label', data=df, palette='Set2')
plt.title('Conversions Count by Group')
plt.xlabel('Group')
plt.ylabel('User Count')
plt.legend(title='Converted?')
plt.tight_layout()
plt.show()
