# A/B Testing Analysis - Clean Version for GitHub

This notebook provides a full A/B testing analysis of an online advertising dataset.
All outputs are generated using **matplotlib static plots** and tables, ensuring full compatibility with GitHub rendering (no interactive widgets).


In [None]:
# === Imports ===
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.multitest import multipletests

# Configure matplotlib
plt.style.use('default')


In [None]:
# === Load Data ===
control_path = "control_group.csv"
test_path    = "test_group.csv"

control_raw = pd.read_csv(control_path, sep=';')
test_raw    = pd.read_csv(test_path, sep=';')

# Rename columns
rename_map = {
    "Campaign Name": "Campaign",
    "Date": "Date",
    "Spend [USD]": "Spend",
    "# of Impressions": "Impression",
    "Reach": "Reach",
    "# of Website Clicks": "Click",
    "# of Searches": "Searches",
    "# of View Content": "ContentViewed",
    "# of Add to Cart": "AddToCart",
    "# of Purchase": "Purchase"
}
control = control_raw.rename(columns=rename_map).copy()
test    = test_raw.rename(columns=rename_map).copy()

# Parse dates
for df in (control, test):
    df["Date"] = pd.to_datetime(df["Date"], dayfirst=True, errors="coerce")

# Fill missing values
num_cols = ["Spend","Impression","Click","Searches","ContentViewed","AddToCart","Purchase","Reach"]
for df in (control, test):
    for c in num_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")
        if df[c].isnull().any():
            df[c].fillna(df[c].mean(), inplace=True)


In [None]:
# === KPIs per row ===
def add_kpis(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["CTR"] = np.where(df["Impression"] > 0, df["Click"] / df["Impression"], np.nan)
    df["CR"]  = np.where(df["Click"] > 0, df["Purchase"] / df["Click"], np.nan)
    df["RevPerImpr"] = np.where(df["Impression"] > 0, df["Spend"] / df["Impression"], np.nan)
    return df

control = add_kpis(control)
test    = add_kpis(test)


In [None]:
# === Aggregates ===
def agg_block(d: pd.DataFrame) -> pd.Series:
    return pd.Series({
        "Impressions": d["Impression"].sum(),
        "Clicks":      d["Click"].sum(),
        "Purchases":   d["Purchase"].sum(),
        "Spend":       d["Spend"].sum(),
        "CTR_mean":    d["CTR"].mean(skipna=True),
        "CR_mean":     d["CR"].mean(skipna=True),
        "RevPerImpr_mean": d["RevPerImpr"].mean(skipna=True),
    })

summary = pd.DataFrame({"Control": agg_block(control), "Test": agg_block(test)})
summary


In [None]:
# === Charts (Matplotlib static) ===
metrics = [
    ("CTR_mean", "CTR (%)", 100.0),
    ("CR_mean", "CR (%)", 100.0),
    ("RevPerImpr_mean", "Revenue per Impression", 1.0),
]
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
colors = ["gold", "lightgreen"]
for ax, (row_name, title, factor) in zip(axes, metrics):
    vals = [summary.loc[row_name, "Control"] * factor, summary.loc[row_name, "Test"] * factor]
    ax.bar(["Control","Test"], vals, color=colors)
    ax.set_title(title)
    ax.set_ylabel(title)
    ax.ticklabel_format(style='plain', axis='y')
plt.tight_layout()
plt.show()
