# A/B Testing: Early Repayment Feature Impact

This notebook analyzes the impact of an early repayment feature on loan performance using a synthetic dataset. The goal is to simulate a real-world A/B testing scenario based on my past experience in Lending analytics.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.proportion import proportions_ztest

# Set style
sns.set(style="whitegrid")

In [None]:
# Load dataset
df = pd.read_csv("data/synthetic_loan_data.csv")
df.head()

## Repayment Outcome Distribution by Group

In [None]:
# Group by treatment and outcome
group_counts = df.groupby(['treatment_group', 'repayment_outcome']).size().unstack()
group_props = group_counts.div(group_counts.sum(axis=1), axis=0)

group_props.plot(kind='bar', stacked=True, figsize=(8,5))
plt.title('Repayment Outcome Proportion by Group')
plt.ylabel('Proportion')
plt.xticks(rotation=0)
plt.show()

## A/B Test: Paid Rate Comparison
We test whether the treatment group has a statistically higher on-time repayment rate than the control group.

In [None]:
# Create binary 'paid' indicator
df['is_paid'] = df['repayment_outcome'] == 'paid'

# Successes and observations by group
successes = df.groupby('treatment_group')['is_paid'].sum()
n_obs = df['treatment_group'].value_counts().sort_index()

# Perform z-test
z_stat, p_val = proportions_ztest(successes, n_obs)

print(f"Z-statistic: {z_stat:.3f}, P-value: {p_val:.3f}")
if p_val < 0.05:
    print("Result: Statistically significant difference in repayment rates.")
else:
    print("Result: No statistically significant difference.")