In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
import shap

: 

In [None]:
np.random.seed(42)
provinces = ['Gauteng', 'Western Cape', 'KwaZulu-Natal', 'Eastern Cape']
vehicle_types = ['Sedan', 'SUV', 'Luxury', 'Truck']
months = pd.date_range('2014-02-01', '2015-08-01', freq='M')
loss_ratio_data = pd.DataFrame(
    np.random.uniform(0.3, 0.8, size=(len(provinces), len(vehicle_types)),
    index=provinces,
    columns=vehicle_types
)

plt.figure(figsize=(10, 6))
sns.heatmap(loss_ratio_data, annot=True, fmt=".2f", cmap="YlOrRd", cbar_kws={'label': 'Loss Ratio'})
plt.title("Loss Ratio by Province & Vehicle Type")
plt.tight_layout()
plt.show()
)

In [None]:
ts_data = pd.DataFrame({
    'Month': months,
    'TotalClaims': np.random.normal(50000, 15000, len(months)),
    'TotalPremium': np.random.normal(80000, 10000, len(months))
}).set_index('Month')

plt.figure(figsize=(12, 5))
plt.plot(ts_data.index, ts_data['TotalClaims'], label='Total Claims', color='red')
plt.plot(ts_data.index, ts_data['TotalPremium'], label='Total Premium', color='blue')
plt.fill_between(ts_data.index, ts_data['TotalClaims'], ts_data['TotalPremium'], 
                 where=(ts_data['TotalClaims'] > ts_data['TotalPremium']), 
                 color='red', alpha=0.3, interpolate=True)
plt.title("Monthly Claims vs Premiums (Feb 2014 - Aug 2015)")
plt.xlabel("Month")
plt.ylabel("Amount (ZAR)")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
X = pd.DataFrame({
    'VehicleAge': np.random.randint(1, 20, 100),
    'CubicCapacity': np.random.randint(1000, 3000, 100),
    'Make': np.random.choice(['Toyota', 'BMW', 'VW'], 100),
    'Province': np.random.choice(provinces, 100)
})
X = pd.get_dummies(X)  # One-hot encode
y = np.random.uniform(5000, 50000, 100)  # Mock claims

model = RandomForestRegressor().fit(X, y)

# SHAP values
explainer = shap.Explainer(model)
shap_values = explainer(X)

plt.figure()
shap.summary_plot(shap_values, X, plot_type="bar", show=False)
plt.title("Top Features Influencing Claim Severity (SHAP Values)")
plt.tight_layout()
plt.show()