# Task 3: A/B Hypothesis Testing
## Insurance Risk Analytics - Statistical Validation

### Objectives:
- Test 4 null hypotheses about risk drivers
- Validate statistical significance of differences
- Provide business recommendations based on findings

### Hypotheses to Test:
1. H₀: There are no risk differences across provinces
2. H₀: There are no risk differences between zip codes
3. H₀: There is no significant margin (profit) difference between zip codes
4. H₀: There is no significant risk difference between Women and Men


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import chi2_contingency, ttest_ind, f_oneway, mannwhitneyu
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
import sys
sys.path.append('../')
from src.utils import calculate_loss_ratio, calculate_claim_frequency, calculate_claim_severity, calculate_margin

# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)


In [None]:
# Load data
# NOTE: Update this path to your actual data file location
try:
    df = pd.read_csv('../data/insurance_data.csv', low_memory=False)
    print(f"Data loaded successfully: {df.shape[0]} rows, {df.shape[1]} columns")
except FileNotFoundError:
    print("Data file not found. Please ensure your data is in ../data/insurance_data.csv")
    print("For now, creating sample data structure for demonstration...")
    # Create sample data structure for demonstration
    np.random.seed(42)
    n_samples = 1000
    df = pd.DataFrame({
        'Province': np.random.choice(['Gauteng', 'Western Cape', 'KwaZulu-Natal', 'Eastern Cape'], n_samples),
        'PostalCode': np.random.choice(['2000', '3000', '4000', '5000', '6000'], n_samples),
        'Gender': np.random.choice(['Male', 'Female'], n_samples),
        'TotalPremium': np.random.uniform(5000, 50000, n_samples),
        'TotalClaims': np.random.exponential(10000, n_samples) * (np.random.random(n_samples) < 0.3)
    })
    print("Sample data created for demonstration purposes")


In [None]:
# Data preparation - Calculate key metrics
df['LossRatio'] = calculate_loss_ratio(df['TotalClaims'], df['TotalPremium'])
df['Margin'] = calculate_margin(df['TotalPremium'], df['TotalClaims'])
df['HasClaim'] = (df['TotalClaims'] > 0).astype(int)

# Calculate claim frequency and severity by group
print("Data prepared with key metrics:")
print(f"- Loss Ratio range: {df['LossRatio'].min():.2f} to {df['LossRatio'].max():.2f}")
print(f"- Claim Frequency: {df['HasClaim'].mean():.2%}")
print(f"- Average Margin: {df['Margin'].mean():.2f}")
print(f"\nData shape: {df.shape}")
print(f"\nMissing values:\n{df[['Province', 'PostalCode', 'Gender', 'TotalPremium', 'TotalClaims']].isnull().sum()}")


## Hypothesis 1: Risk Differences Across Provinces

**H₀:** There are no risk differences across provinces  
**H₁:** There are significant risk differences across provinces

We'll test this using:
- **Claim Frequency** (categorical): Chi-square test
- **Loss Ratio** (continuous): ANOVA or Kruskal-Wallis test


In [None]:
# Hypothesis 1: Risk differences across provinces

# Remove missing values
df_province = df[df['Province'].notna()].copy()

# Test 1.1: Claim Frequency (Chi-square test)
contingency_table = pd.crosstab(df_province['Province'], df_province['HasClaim'])
print("Contingency Table (Province vs HasClaim):")
print(contingency_table)
print()

chi2, p_value_freq, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-square Test for Claim Frequency:")
print(f"  Chi-square statistic: {chi2:.4f}")
print(f"  p-value: {p_value_freq:.6f}")
print(f"  Degrees of freedom: {dof}")
print(f"  Result: {'REJECT H₀' if p_value_freq < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
print()

# Test 1.2: Loss Ratio (ANOVA)
province_groups = [group['LossRatio'].dropna() for name, group in df_province.groupby('Province')]
f_stat, p_value_loss = f_oneway(*province_groups)
print(f"ANOVA Test for Loss Ratio:")
print(f"  F-statistic: {f_stat:.4f}")
print(f"  p-value: {p_value_loss:.6f}")
print(f"  Result: {'REJECT H₀' if p_value_loss < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
print()

# Calculate summary statistics by province
province_stats = df_province.groupby('Province').agg({
    'HasClaim': 'mean',
    'LossRatio': 'mean',
    'TotalClaims': lambda x: x[x > 0].mean() if (x > 0).any() else 0,
    'TotalPremium': 'mean'
}).round(4)
province_stats.columns = ['Claim_Frequency', 'Loss_Ratio', 'Claim_Severity', 'Avg_Premium']
print("Summary Statistics by Province:")
print(province_stats)


In [None]:
# Visualization for Hypothesis 1
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Claim Frequency by Province
claim_freq = df_province.groupby('Province')['HasClaim'].mean().sort_values(ascending=False)
axes[0, 0].barh(claim_freq.index, claim_freq.values, color='steelblue')
axes[0, 0].set_xlabel('Claim Frequency', fontsize=11)
axes[0, 0].set_title('Claim Frequency by Province', fontsize=12, fontweight='bold')
axes[0, 0].grid(axis='x', alpha=0.3)

# Loss Ratio by Province
loss_ratio = df_province.groupby('Province')['LossRatio'].mean().sort_values(ascending=False)
axes[0, 1].barh(loss_ratio.index, loss_ratio.values, color='coral')
axes[0, 1].axvline(x=1.0, color='red', linestyle='--', linewidth=2, label='Break-even')
axes[0, 1].set_xlabel('Loss Ratio', fontsize=11)
axes[0, 1].set_title('Loss Ratio by Province', fontsize=12, fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(axis='x', alpha=0.3)

# Box plot: Loss Ratio distribution
df_province.boxplot(column='LossRatio', by='Province', ax=axes[1, 0])
axes[1, 0].set_title('Loss Ratio Distribution by Province', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Province', fontsize=11)
axes[1, 0].set_ylabel('Loss Ratio', fontsize=11)
plt.setp(axes[1, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')

# Margin by Province
margin = df_province.groupby('Province')['Margin'].mean().sort_values(ascending=False)
axes[1, 1].barh(margin.index, margin.values, color='green')
axes[1, 1].axvline(x=0, color='red', linestyle='--', linewidth=2)
axes[1, 1].set_xlabel('Average Margin (Profit)', fontsize=11)
axes[1, 1].set_title('Average Margin by Province', fontsize=12, fontweight='bold')
axes[1, 1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('../reports/figures/hypothesis1_provinces.png', dpi=300, bbox_inches='tight')
plt.show()

# Business Interpretation
print("\n" + "="*80)
print("BUSINESS INTERPRETATION - Hypothesis 1:")
print("="*80)
if p_value_freq < 0.05 or p_value_loss < 0.05:
    print(f"✓ REJECT H₀: There ARE significant risk differences across provinces (p < 0.05)")
    print(f"\nKey Findings:")
    highest_risk = province_stats['Loss_Ratio'].idxmax()
    lowest_risk = province_stats['Loss_Ratio'].idxmin()
    risk_diff = ((province_stats.loc[highest_risk, 'Loss_Ratio'] / 
                  province_stats.loc[lowest_risk, 'Loss_Ratio']) - 1) * 100
    print(f"  - {highest_risk} has the highest loss ratio: {province_stats.loc[highest_risk, 'Loss_Ratio']:.3f}")
    print(f"  - {lowest_risk} has the lowest loss ratio: {province_stats.loc[lowest_risk, 'Loss_Ratio']:.3f}")
    print(f"  - Risk difference: {risk_diff:.1f}%")
    print(f"\nRecommendation: Adjust premiums by province. Consider increasing premiums in {highest_risk} by 10-15%.")
else:
    print(f"✗ FAIL TO REJECT H₀: No significant risk differences across provinces (p ≥ 0.05)")
    print("Recommendation: Province may not be a significant risk factor for pricing.")
print("="*80)


In [None]:
# Hypothesis 2: Risk differences between zip codes

df_zipcode = df[df['PostalCode'].notna()].copy()

# Get top zip codes by sample size (for meaningful comparison)
zipcode_counts = df_zipcode['PostalCode'].value_counts()
top_zipcodes = zipcode_counts[zipcode_counts >= 50].head(10).index.tolist()  # At least 50 samples

if len(top_zipcodes) < 2:
    print("Warning: Not enough zip codes with sufficient samples. Using all available zip codes.")
    top_zipcodes = zipcode_counts.head(10).index.tolist()

df_zipcode_filtered = df_zipcode[df_zipcode['PostalCode'].isin(top_zipcodes)].copy()

# Test 2.1: Claim Frequency (Chi-square)
contingency_zip = pd.crosstab(df_zipcode_filtered['PostalCode'], df_zipcode_filtered['HasClaim'])
chi2_zip, p_value_zip_freq, dof_zip, expected_zip = chi2_contingency(contingency_zip)
print(f"Chi-square Test for Claim Frequency by Zip Code:")
print(f"  Chi-square statistic: {chi2_zip:.4f}")
print(f"  p-value: {p_value_zip_freq:.6f}")
print(f"  Result: {'REJECT H₀' if p_value_zip_freq < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
print()

# Test 2.2: Loss Ratio (ANOVA)
zipcode_groups = [group['LossRatio'].dropna() for name, group in df_zipcode_filtered.groupby('PostalCode')]
if len(zipcode_groups) >= 2:
    f_stat_zip, p_value_zip_loss = f_oneway(*zipcode_groups)
    print(f"ANOVA Test for Loss Ratio by Zip Code:")
    print(f"  F-statistic: {f_stat_zip:.4f}")
    print(f"  p-value: {p_value_zip_loss:.6f}")
    print(f"  Result: {'REJECT H₀' if p_value_zip_loss < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
else:
    p_value_zip_loss = 1.0
    print("Not enough zip code groups for ANOVA test")

print()
zipcode_stats = df_zipcode_filtered.groupby('PostalCode').agg({
    'HasClaim': 'mean',
    'LossRatio': 'mean',
    'TotalClaims': lambda x: x[x > 0].mean() if (x > 0).any() else 0,
    'Margin': 'mean'
}).round(4)
zipcode_stats.columns = ['Claim_Frequency', 'Loss_Ratio', 'Claim_Severity', 'Avg_Margin']
print("Summary Statistics by Zip Code (Top 10):")
print(zipcode_stats.sort_values('Loss_Ratio', ascending=False))


In [None]:
# Visualization for Hypothesis 2
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Loss Ratio by Zip Code (Top 10)
loss_ratio_zip = zipcode_stats.sort_values('Loss_Ratio', ascending=False).head(10)
axes[0].barh(range(len(loss_ratio_zip)), loss_ratio_zip['Loss_Ratio'].values, color='steelblue')
axes[0].set_yticks(range(len(loss_ratio_zip)))
axes[0].set_yticklabels(loss_ratio_zip.index)
axes[0].axvline(x=1.0, color='red', linestyle='--', linewidth=2, label='Break-even')
axes[0].set_xlabel('Loss Ratio', fontsize=11)
axes[0].set_title('Loss Ratio by Zip Code (Top 10)', fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(axis='x', alpha=0.3)

# Margin by Zip Code
margin_zip = zipcode_stats.sort_values('Avg_Margin', ascending=False).head(10)
axes[1].barh(range(len(margin_zip)), margin_zip['Avg_Margin'].values, color='green')
axes[1].set_yticks(range(len(margin_zip)))
axes[1].set_yticklabels(margin_zip.index)
axes[1].axvline(x=0, color='red', linestyle='--', linewidth=2)
axes[1].set_xlabel('Average Margin (Profit)', fontsize=11)
axes[1].set_title('Average Margin by Zip Code (Top 10)', fontsize=12, fontweight='bold')
axes[1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('../reports/figures/hypothesis2_zipcodes.png', dpi=300, bbox_inches='tight')
plt.show()

# Business Interpretation
print("\n" + "="*80)
print("BUSINESS INTERPRETATION - Hypothesis 2:")
print("="*80)
if p_value_zip_freq < 0.05 or p_value_zip_loss < 0.05:
    print(f"✓ REJECT H₀: There ARE significant risk differences between zip codes (p < 0.05)")
    highest_risk_zip = zipcode_stats['Loss_Ratio'].idxmax()
    lowest_risk_zip = zipcode_stats['Loss_Ratio'].idxmin()
    print(f"\nKey Findings:")
    print(f"  - Highest risk zip code: {highest_risk_zip} (Loss Ratio: {zipcode_stats.loc[highest_risk_zip, 'Loss_Ratio']:.3f})")
    print(f"  - Lowest risk zip code: {lowest_risk_zip} (Loss Ratio: {zipcode_stats.loc[lowest_risk_zip, 'Loss_Ratio']:.3f})")
    print(f"\nRecommendation: Implement location-based pricing. Consider zip code-specific premium adjustments.")
else:
    print(f"✗ FAIL TO REJECT H₀: No significant risk differences between zip codes (p ≥ 0.05)")
    print("Recommendation: Zip code may not be a significant risk factor for pricing.")
print("="*80)


## Hypothesis 3: Margin (Profit) Differences Between Zip Codes

**H₀:** There is no significant margin (profit) difference between zip codes  
**H₁:** There are significant margin differences between zip codes


In [None]:
# Hypothesis 3: Margin differences between zip codes

# Use the same filtered zip code data
df_zipcode_margin = df_zipcode_filtered.copy()

# Test: Margin (ANOVA)
zipcode_margin_groups = [group['Margin'].dropna() for name, group in df_zipcode_margin.groupby('PostalCode')]
if len(zipcode_margin_groups) >= 2:
    f_stat_margin, p_value_margin = f_oneway(*zipcode_margin_groups)
    print(f"ANOVA Test for Margin by Zip Code:")
    print(f"  F-statistic: {f_stat_margin:.4f}")
    print(f"  p-value: {p_value_margin:.6f}")
    print(f"  Result: {'REJECT H₀' if p_value_margin < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
else:
    p_value_margin = 1.0
    print("Not enough zip code groups for ANOVA test")

print()
margin_stats = df_zipcode_margin.groupby('PostalCode').agg({
    'Margin': ['mean', 'std', 'count'],
    'TotalPremium': 'mean',
    'TotalClaims': 'mean'
}).round(2)
margin_stats.columns = ['Avg_Margin', 'Std_Margin', 'Count', 'Avg_Premium', 'Avg_Claims']
print("Margin Statistics by Zip Code:")
print(margin_stats.sort_values('Avg_Margin', ascending=False))


In [None]:
# Visualization for Hypothesis 3
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Margin distribution by Zip Code
margin_by_zip = margin_stats.sort_values('Avg_Margin', ascending=False).head(10)
axes[0].barh(range(len(margin_by_zip)), margin_by_zip['Avg_Margin'].values, 
             xerr=margin_by_zip['Std_Margin'].values, color='green', alpha=0.7)
axes[0].set_yticks(range(len(margin_by_zip)))
axes[0].set_yticklabels(margin_by_zip.index)
axes[0].axvline(x=0, color='red', linestyle='--', linewidth=2, label='Break-even')
axes[0].set_xlabel('Average Margin (Profit)', fontsize=11)
axes[0].set_title('Average Margin by Zip Code (with std dev)', fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(axis='x', alpha=0.3)

# Box plot: Margin distribution
df_zipcode_margin.boxplot(column='Margin', by='PostalCode', ax=axes[1], 
                          grid=False, rot=45)
axes[1].axhline(y=0, color='red', linestyle='--', linewidth=2)
axes[1].set_title('Margin Distribution by Zip Code', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Zip Code', fontsize=11)
axes[1].set_ylabel('Margin', fontsize=11)
plt.setp(axes[1].xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.savefig('../reports/figures/hypothesis3_margin_zipcodes.png', dpi=300, bbox_inches='tight')
plt.show()

# Business Interpretation
print("\n" + "="*80)
print("BUSINESS INTERPRETATION - Hypothesis 3:")
print("="*80)
if p_value_margin < 0.05:
    print(f"✓ REJECT H₀: There ARE significant margin differences between zip codes (p < 0.05)")
    profitable_zips = margin_stats[margin_stats['Avg_Margin'] > 0].sort_values('Avg_Margin', ascending=False)
    unprofitable_zips = margin_stats[margin_stats['Avg_Margin'] < 0].sort_values('Avg_Margin')
    
    print(f"\nKey Findings:")
    if len(profitable_zips) > 0:
        print(f"  - Most profitable zip code: {profitable_zips.index[0]} (Margin: {profitable_zips.iloc[0]['Avg_Margin']:.2f})")
    if len(unprofitable_zips) > 0:
        print(f"  - Least profitable zip code: {unprofitable_zips.index[0]} (Margin: {unprofitable_zips.iloc[0]['Avg_Margin']:.2f})")
    print(f"\nRecommendation: Focus marketing on profitable zip codes. Consider premium adjustments for unprofitable areas.")
else:
    print(f"✗ FAIL TO REJECT H₀: No significant margin differences between zip codes (p ≥ 0.05)")
    print("Recommendation: Margin is relatively consistent across zip codes.")
print("="*80)


## Hypothesis 4: Risk Differences Between Women and Men

**H₀:** There is no significant risk difference between Women and Men  
**H₁:** There are significant risk differences between Women and Men


In [None]:
# Hypothesis 4: Risk differences between Women and Men

df_gender = df[df['Gender'].notna()].copy()
df_gender = df_gender[df_gender['Gender'].isin(['Male', 'Female', 'M', 'F'])].copy()

# Standardize gender labels
df_gender['Gender'] = df_gender['Gender'].replace({'M': 'Male', 'F': 'Female'})

# Test 4.1: Claim Frequency (Chi-square)
contingency_gender = pd.crosstab(df_gender['Gender'], df_gender['HasClaim'])
print("Contingency Table (Gender vs HasClaim):")
print(contingency_gender)
print()

chi2_gender, p_value_gender_freq, dof_gender, expected_gender = chi2_contingency(contingency_gender)
print(f"Chi-square Test for Claim Frequency by Gender:")
print(f"  Chi-square statistic: {chi2_gender:.4f}")
print(f"  p-value: {p_value_gender_freq:.6f}")
print(f"  Result: {'REJECT H₀' if p_value_gender_freq < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
print()

# Test 4.2: Claim Severity (t-test)
male_claims = df_gender[df_gender['Gender'] == 'Male']['TotalClaims']
female_claims = df_gender[df_gender['Gender'] == 'Female']['TotalClaims']

# Only test on policies with claims
male_claims_with_claim = df_gender[(df_gender['Gender'] == 'Male') & (df_gender['TotalClaims'] > 0)]['TotalClaims']
female_claims_with_claim = df_gender[(df_gender['Gender'] == 'Female') & (df_gender['TotalClaims'] > 0)]['TotalClaims']

if len(male_claims_with_claim) > 0 and len(female_claims_with_claim) > 0:
    t_stat_severity, p_value_severity = ttest_ind(male_claims_with_claim, female_claims_with_claim, equal_var=False)
    print(f"t-test for Claim Severity (policies with claims only):")
    print(f"  t-statistic: {t_stat_severity:.4f}")
    print(f"  p-value: {p_value_severity:.6f}")
    print(f"  Result: {'REJECT H₀' if p_value_severity < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
else:
    p_value_severity = 1.0
    print("Not enough data for severity comparison")

print()

# Test 4.3: Loss Ratio (t-test)
male_loss_ratio = df_gender[df_gender['Gender'] == 'Male']['LossRatio'].dropna()
female_loss_ratio = df_gender[df_gender['Gender'] == 'Female']['LossRatio'].dropna()

if len(male_loss_ratio) > 0 and len(female_loss_ratio) > 0:
    t_stat_loss, p_value_loss_gender = ttest_ind(male_loss_ratio, female_loss_ratio, equal_var=False)
    print(f"t-test for Loss Ratio:")
    print(f"  t-statistic: {t_stat_loss:.4f}")
    print(f"  p-value: {p_value_loss_gender:.6f}")
    print(f"  Result: {'REJECT H₀' if p_value_loss_gender < 0.05 else 'FAIL TO REJECT H₀'} (α = 0.05)")
else:
    p_value_loss_gender = 1.0
    print("Not enough data for loss ratio comparison")

print()
gender_stats = df_gender.groupby('Gender').agg({
    'HasClaim': 'mean',
    'LossRatio': 'mean',
    'TotalClaims': lambda x: x[x > 0].mean() if (x > 0).any() else 0,
    'TotalPremium': 'mean',
    'Margin': 'mean'
}).round(4)
gender_stats.columns = ['Claim_Frequency', 'Loss_Ratio', 'Claim_Severity', 'Avg_Premium', 'Avg_Margin']
print("Summary Statistics by Gender:")
print(gender_stats)


In [None]:
# Visualization for Hypothesis 4
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Claim Frequency
claim_freq_gender = gender_stats['Claim_Frequency']
axes[0, 0].bar(claim_freq_gender.index, claim_freq_gender.values, color=['steelblue', 'coral'])
axes[0, 0].set_ylabel('Claim Frequency', fontsize=11)
axes[0, 0].set_title('Claim Frequency by Gender', fontsize=12, fontweight='bold')
axes[0, 0].grid(axis='y', alpha=0.3)

# Loss Ratio
loss_ratio_gender = gender_stats['Loss_Ratio']
axes[0, 1].bar(loss_ratio_gender.index, loss_ratio_gender.values, color=['steelblue', 'coral'])
axes[0, 1].axhline(y=1.0, color='red', linestyle='--', linewidth=2, label='Break-even')
axes[0, 1].set_ylabel('Loss Ratio', fontsize=11)
axes[0, 1].set_title('Loss Ratio by Gender', fontsize=12, fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(axis='y', alpha=0.3)

# Claim Severity
severity_gender = gender_stats['Claim_Severity']
axes[1, 0].bar(severity_gender.index, severity_gender.values, color=['steelblue', 'coral'])
axes[1, 0].set_ylabel('Claim Severity (Average Claim Amount)', fontsize=11)
axes[1, 0].set_title('Claim Severity by Gender', fontsize=12, fontweight='bold')
axes[1, 0].grid(axis='y', alpha=0.3)

# Margin
margin_gender = gender_stats['Avg_Margin']
axes[1, 1].bar(margin_gender.index, margin_gender.values, color=['green', 'orange'])
axes[1, 1].axhline(y=0, color='red', linestyle='--', linewidth=2)
axes[1, 1].set_ylabel('Average Margin (Profit)', fontsize=11)
axes[1, 1].set_title('Average Margin by Gender', fontsize=12, fontweight='bold')
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('../reports/figures/hypothesis4_gender.png', dpi=300, bbox_inches='tight')
plt.show()

# Business Interpretation
print("\n" + "="*80)
print("BUSINESS INTERPRETATION - Hypothesis 4:")
print("="*80)
p_value_gender_overall = min(p_value_gender_freq, p_value_severity, p_value_loss_gender)
if p_value_gender_overall < 0.05:
    print(f"✓ REJECT H₀: There ARE significant risk differences between Women and Men (p < 0.05)")
    print(f"\nKey Findings:")
    if gender_stats.loc['Male', 'Loss_Ratio'] > gender_stats.loc['Female', 'Loss_Ratio']:
        risk_diff = ((gender_stats.loc['Male', 'Loss_Ratio'] / 
                      gender_stats.loc['Female', 'Loss_Ratio']) - 1) * 100
        print(f"  - Men have higher loss ratio: {gender_stats.loc['Male', 'Loss_Ratio']:.3f} vs {gender_stats.loc['Female', 'Loss_Ratio']:.3f}")
        print(f"  - Risk difference: {risk_diff:.1f}%")
        print(f"\nRecommendation: Consider gender-based pricing adjustments (with regulatory compliance).")
    else:
        risk_diff = ((gender_stats.loc['Female', 'Loss_Ratio'] / 
                      gender_stats.loc['Male', 'Loss_Ratio']) - 1) * 100
        print(f"  - Women have higher loss ratio: {gender_stats.loc['Female', 'Loss_Ratio']:.3f} vs {gender_stats.loc['Male', 'Loss_Ratio']:.3f}")
        print(f"  - Risk difference: {risk_diff:.1f}%")
        print(f"\nRecommendation: Consider gender-based pricing adjustments (with regulatory compliance).")
else:
    print(f"✗ FAIL TO REJECT H₀: No significant risk differences between Women and Men (p ≥ 0.05)")
    print("Recommendation: Gender may not be a significant risk factor for pricing.")
    print("Note: Regulatory considerations may still apply regardless of statistical significance.")
print("="*80)


## Summary of All Hypothesis Tests

### Results Summary Table


In [None]:
# Create summary table
results_summary = pd.DataFrame({
    'Hypothesis': [
        'H₀: No risk differences across provinces',
        'H₀: No risk differences between zip codes',
        'H₀: No margin differences between zip codes',
        'H₀: No risk differences between Women and Men'
    ],
    'Test_Type': [
        'Chi-square (Frequency) + ANOVA (Loss Ratio)',
        'Chi-square (Frequency) + ANOVA (Loss Ratio)',
        'ANOVA (Margin)',
        'Chi-square (Frequency) + t-test (Severity/Loss Ratio)'
    ],
    'P_Value': [
        min(p_value_freq, p_value_loss),
        min(p_value_zip_freq, p_value_zip_loss),
        p_value_margin,
        min(p_value_gender_freq, p_value_severity, p_value_loss_gender)
    ],
    'Result': [
        'REJECT H₀' if min(p_value_freq, p_value_loss) < 0.05 else 'FAIL TO REJECT H₀',
        'REJECT H₀' if min(p_value_zip_freq, p_value_zip_loss) < 0.05 else 'FAIL TO REJECT H₀',
        'REJECT H₀' if p_value_margin < 0.05 else 'FAIL TO REJECT H₀',
        'REJECT H₀' if min(p_value_gender_freq, p_value_severity, p_value_loss_gender) < 0.05 else 'FAIL TO REJECT H₀'
    ],
    'Significance_Level': ['α = 0.05'] * 4
})

print("="*100)
print("HYPOTHESIS TESTING RESULTS SUMMARY")
print("="*100)
print(results_summary.to_string(index=False))
print("="*100)

# Save results
results_summary.to_csv('../reports/hypothesis_testing_results.csv', index=False)
print("\nResults saved to: ../reports/hypothesis_testing_results.csv")
