## Experimentation

1. A/B Testing
2. Statistical Analysis

### 1. A/B Testing

##### Importing packages

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

print('Packages imported successfully')

Packages imported successfully


Loading in and inspecting the data

In [None]:
# Load the dataset
data = pd.read_csv('data/data_encoded.csv')
data.drop(columns=['Unnamed: 0', 'Customer ID'], inplace=True)
# Preview the dataset
data.head()


In [None]:
# Convert relevant columns to appropriate data types
data['Gender'] = data['Gender'].astype('category')
data['City'] = data['City'].astype('category')
data['Membership Type'] = data['Membership Type'].astype('category')
data['Discount Applied'] = data['Discount Applied'].astype('bool')
data['Satisfaction Level'] = data['Satisfaction Level'].astype('category')

print("Data types successfully converted")
# Verify the data types and structure
print(data.info())

In [None]:
set(data['Discount Applied'].unique())

In [None]:
# Split data into control and treatment groups based on 'Discount Applied'
control_group = data[data['Discount Applied'] == False]
treatment_group = data[data['Discount Applied'] == True]

# Check the size of each group to ensure they are reasonably balanced
print(f"Control Group Size: {len(control_group)}")
print(f"Treatment Group Size: {len(treatment_group)}")

Calculating Basic Metrics

In [None]:
# Calculate average total spend for both groups
control_avg_spend = control_group['Total Spend'].mean()
treatment_avg_spend = treatment_group['Total Spend'].mean()

print(f"Control (No discount) Group Average Spend: ${control_avg_spend:.2f}")
print(f"Treatment (Discount) Group Average Spend: ${treatment_avg_spend:.2f}")

# Calculate the difference in average spend
avg_spend_diff = treatment_avg_spend - control_avg_spend
print(f"Difference in Average Spend: ${avg_spend_diff:.2f}")


In [None]:
# Conduct a two-sample t-test
t_stat, p_value = stats.ttest_ind(treatment_group['Total Spend'], control_group['Total Spend'])

print(f"T-Statistic: {t_stat:.4f}")
print(f"P-Value: {p_value:.4f}")

# Determine statistical significance
alpha = 0.05
if p_value < alpha:
    print("Result: The difference is statistically significant.")
else:
    print("Result: The difference is not statistically significant.")


In [None]:
# Calculate the confidence interval for the difference in average spend
ci_lower, ci_upper = stats.t.interval(
    alpha=0.95, 
    df=len(treatment_group) + len(control_group) - 2,
    loc=avg_spend_diff, 
    scale=stats.sem(treatment_group['Total Spend']) + stats.sem(control_group['Total Spend'])
)

print(f"95% Confidence Interval for the difference in Average Spend: [${ci_lower:.2f}, ${ci_upper:.2f}]")


In [None]:
# Summary of the results
print("\n--- A/B Test Summary ---")
print(f"Control Group Average Spend: ${control_avg_spend:.2f}")
print(f"Treatment Group Average Spend: ${treatment_avg_spend:.2f}")
print(f"Difference in Average Spend: ${avg_spend_diff:.2f}")
print(f"T-Statistic: {t_stat:.4f}")
print(f"P-Value: {p_value:.4f}")
print(f"95% Confidence Interval: [${ci_lower:.2f}, ${ci_upper:.2f}]")

# Interpretation of the results
if p_value < alpha:
    print("\nConclusion: The discount significantly increased the average spend.")
else:
    print("\nConclusion: The discount did not significantly increase the average spend.")
