Let's consider an A/B test example where you want to analyze the effect of a vaccine on patients' health before and after its administration.

In [1]:
# Generate Synthetic Dataset:

import pandas as pd
import numpy as np

np.random.seed(42)

n_patients = 100  # Total number of patients
health_scores_before = np.random.normal(70, 10, n_patients)  # Health scores before vaccine
health_scores_after = np.random.normal(75, 10, n_patients)   # Health scores after vaccine

# Create a DataFrame
data = pd.DataFrame({
    'patient_id': range(1, n_patients+1),
    'health_before': health_scores_before,
    'health_after': health_scores_after
})

In [2]:
data.head(10)

Unnamed: 0,patient_id,health_before,health_after
0,1,74.967142,60.846293
1,2,68.617357,70.793547
2,3,76.476885,71.572855
3,4,85.230299,66.977227
4,5,67.658466,73.387143
5,6,67.65863,79.040509
6,7,85.792128,93.861859
7,8,77.674347,76.745778
8,9,65.305256,77.575504
9,10,75.4256,74.255541


In [4]:
# Perform A/B Test:

from scipy.stats import ttest_rel

# Perform a paired-sample t-test
t_statistic, p_value = ttest_rel(data['health_before'], data['health_after'])

t_statistic, p_value

(-4.460504825796643, 2.1631732032138625e-05)

In [5]:
# Define significance level
alpha = 0.05

# Check if the p-value is below the significance level
if p_value < alpha:
    print("Null hypothesis rejected. There is a significant difference in health scores before and after the vaccine.")
else:
    print("Null hypothesis not rejected. There is no significant difference in health scores before and after the vaccine.")

Null hypothesis rejected. There is a significant difference in health scores before and after the vaccine.
