In [None]:
import pandas as pd

df = pd.read_csv('../data/processed/cleaned_amazon.csv')

# riders with more than 10 orders
agent_counts = df['Agent_Age'].value_counts()
eligible_agents = agent_counts[agent_counts > 10].index.tolist()

df['Group'] = df['Agent_Age'].apply(lambda x: 'B' if x in eligible_agents else 'A')


In [None]:
ab_summary = df.groupby('Group').agg({
    'Pickup_Delay_Minutes': 'mean',
    'Delivery_Speed_KMPH': 'mean',
    'Agent_Rating': 'mean'
}).reset_index()

ab_summary.to_csv('../reports/ab_summary.csv', index=False)

In [None]:
from scipy.stats import ttest_ind

group_a = df[df['Group'] == 'A']['Pickup_Delay_Minutes']
group_b = df[df['Group'] == 'B']['Pickup_Delay_Minutes']

t_stat, p_val = ttest_ind(group_a, group_b, equal_var=False)

print(f"T-statistic = {t_stat:.3f}, P-value = {p_val:.4f}")

In [None]:
import matplotlib.pyplot as plt

metrics = ['Pickup_Delay_Minutes', 'Delivery_Speed_KMPH', 'Agent_Rating']
colors = ['steelblue', 'orange', 'green']

fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for i, metric in enumerate(metrics):
    ab_summary.plot(kind='bar', x='Group', y=metric, ax=ax[i], legend=False, color=colors[i])
    ax[i].set_title(metric)
    ax[i].set_xlabel('')
    ax[i].set_ylabel('')

plt.suptitle('A/B Group Comparison on Key Metrics')
plt.tight_layout()
plt.savefig('../reports/screenshots/ab_test_comparison.png')
plt.show()


In [None]:
#If p < 0.05:
#→ The result is statistically significant.
#Otherwise:
#→ No significant difference between groups.

### insights: This allows us to evaluate if loyalty incentives truly impact rider behavior.