In [24]:
# Smart A/B Testing Platform

## 1. Load Dataset
## 2. Data Cleaning
## 3. Statistical Tests (t-test, chi-square)
## 4. Auto Insights
## 5. Visualization & Dashboard

# STEP 0: Setup
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from statsmodels.stats.proportion import proportions_ztest
from IPython.display import Markdown, display


# STEP 1: Define Functions
def run_ab_test(control_converted, treatment_converted):
    n_control = len(control_converted)
    n_treatment = len(treatment_converted)

    p_control = control_converted.mean()
    p_treatment = treatment_converted.mean()

    success = np.array([control_converted.sum(), treatment_converted.sum()])
    nobs = np.array([n_control, n_treatment])

    z_score, p_value = proportions_ztest(success, nobs)

    p_pool = (success[0] + success[1]) / (n_control + n_treatment)
    se_pool = np.sqrt(p_pool * (1 - p_pool) * (1/n_control + 1/n_treatment))

    margin = 1.96 * se_pool
    diff = p_treatment - p_control
    ci_lower = diff - margin
    ci_upper = diff + margin

    return p_control, p_treatment, p_value, ci_lower, ci_upper

def generate_ab_insight(p_control, p_treatment, p_value, ci_lower, ci_upper, alpha=0.05):
    uplift = p_treatment - p_control
    result = ""

    # Outcome status
    if p_value < alpha:
        result += f" *Statistically significant difference detected.*\n\n"
        result += f"Variant B shows an uplift of **{uplift:.2%}** over control.\n"
        result += f"p-value = {p_value:.5f}, 95% CI = ({ci_lower:.2%}, {ci_upper:.2%})\n"
        result += "**Recommendation:** Consider launching Variant B."
    else:
        result += f" *No statistically significant difference detected.*\n\n"
        result += f"Observed uplift = **{uplift:.2%}**\n"
        result += f"p-value = {p_value:.5f}, 95% CI = ({ci_lower:.2%}, {ci_upper:.2%})\n"
        result += "**Recommendation:** Continue testing or collect more data."

    return result


# STEP 2: Load Data
data = pd.read_csv('/content/drive/MyDrive/AB Testing Project/ab_data.csv')
print(" Top 5 rows:")
print(data.head())

data.info()
print("\nMissing values:\n", data.isnull().sum())
print("\nUnique Groups:\n", data['group'].value_counts())
print("\nConversion Stats:\n", data['# of Purchase'].value_counts())


# STEP 3: Clean Data
clean_data = data.dropna(subset=['# of Purchase'])
clean_data['converted'] = clean_data['# of Purchase'].apply(lambda x: 1 if x >= 1 else 0)
print(" Cleaned Data Shape:", clean_data.shape)


# STEP 4: Split into groups
control = clean_data[clean_data['group'] == 'control']
treatment = clean_data[clean_data['group'] == 'treatment']
control_converted = control['converted']
treatment_converted = treatment['converted']


# STEP 5: Chi-Square Test
contingency = pd.crosstab(clean_data['group'], clean_data['converted'])
chi2, p, dof, expected = chi2_contingency(contingency)
print("\n Chi-Square Test Results:")
print("Chi2 Stat:", chi2)
print("P-value:", p)


# STEP 6: Run z-test + Confidence Interval
p_control, p_treatment, p_value, ci_lower, ci_upper = run_ab_test(control_converted, treatment_converted)

# STEP 7: Generate Human Insight
insight = generate_ab_insight(p_control, p_treatment, p_value, ci_lower, ci_upper)
display(Markdown(insight))



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 Top 5 rows:
      Campaign Name        Date  Spend [USD]  # of Impressions  Reach  \
0  Control Campaign  19.01.2024      1727.59             53561  72941   
1     Test Campaign  03.02.2024      2346.70            101430  81540   
2  Control Campaign  04.01.2024      2456.91             98984  48141   
3     Test Campaign  06.02.2024      2624.48            138859  43702   
4  Control Campaign  14.01.2024      1426.02             83159  79080   

   # of Website Clicks  # of Searches  # of View Content  # of Add to Cart  \
0                 5389           2755                946               570   
1                 8949           3481               2506               267   
2                 1854           2134               1354               917   
3                10017           2289               1084              1096   
4                 5931       

 *Statistically significant difference detected.*

Variant B shows an uplift of **26.00%** over control.
p-value = 0.00642, 95% CI = (7.30%, 44.70%)
**Recommendation:** Consider launching Variant B.