<a href="https://colab.research.google.com/github/murphycollins/murphycollins.github.io/blob/main/ab-test-email/notebooks/ab_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install pandas scipy matplotlib seaborn

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import beta

sns.set(style="whitegrid")

# Load dataset (you must create ab_sample.csv in your repo under data/)
url = "https://raw.githubusercontent.com/murphycollins/ab-test-email/main/data/ab_sample.csv"
df = pd.read_csv(url)

df.head()

In [None]:

summary = df.groupby("variant")["converted"].agg(["sum","count"])
summary["rate"] = summary["sum"] / summary["count"]
summary

In [None]:

# Build contingency table
count_A, count_B = summary.loc["A","sum"], summary.loc["B","sum"]
nobs_A, nobs_B = summary.loc["A","count"], summary.loc["B","count"]

table = [[count_A, nobs_A - count_A],
         [count_B, nobs_B - count_B]]

chi2, pval, _, _ = stats.chi2_contingency(table)
print("Chi-squared test p-value:", pval)

In [None]:

aA, bA = 1 + summary.loc["A","sum"], 1 + summary.loc["A","count"] - summary.loc["A","sum"]
aB, bB = 1 + summary.loc["B","sum"], 1 + summary.loc["B","count"] - summary.loc["B","sum"]

xs = np.linspace(0,1,200)
plt.figure(figsize=(8,5))
plt.plot(xs, beta.pdf(xs, aA, bA), label="Variant A")
plt.plot(xs, beta.pdf(xs, aB, bB), label="Variant B")
plt.title("Posterior Conversion Rates")
plt.xlabel("Conversion Rate")
plt.ylabel("Density")
plt.legend()
plt.show()