# Analysis for microinsurance 

In [102]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import warnings 
warnings.filterwarnings('ignore')

In [104]:
df = pd.read_excel(r"C:\Users\FUJITSU\OneDrive\Desktop\Data analysis\Extracted data.xlsx")

In [106]:
df.head()

Unnamed: 0,Household Receipts (Rs.),Household Expenditure(Rs.),Income,Savings,Medical Care
0,40737.5,37159.28,41545,3578.22,1197.98
1,23894.1,21725.67,23192,2168.44,781.8
2,29535.95,27138.11,29049,2397.84,925.68
3,31468.51,30475.1,31373,993.41,1030.58
4,36931.5,36337.87,37643,593.63,1226.37


In [108]:
df.describe()

Unnamed: 0,Household Receipts (Rs.),Household Expenditure(Rs.),Income,Savings,Medical Care
count,90.0,90.0,90.0,90.0,90.0
mean,39246.705,36323.556889,39550.133333,2923.148556,1185.701778
std,13453.950642,11471.38805,14373.054778,3346.144721,504.840892
min,19284.49,19862.42,19091.0,-2223.61,616.34
25%,30121.8525,27868.56,29780.75,302.86,851.28
50%,36611.55,34469.775,35908.0,2438.14,1038.09
75%,44010.27,41498.4725,43243.0,4518.6725,1351.9925
max,79015.36,68189.54,78872.0,17318.77,3221.3


In [110]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Household Receipts (Rs.)    90 non-null     float64
 1   Household Expenditure(Rs.)  90 non-null     float64
 2   Income                      90 non-null     int64  
 3   Savings                     90 non-null     float64
 4   Medical Care                90 non-null     float64
dtypes: float64(4), int64(1)
memory usage: 3.6 KB


In [112]:
df.columns

Index(['Household Receipts (Rs.)', 'Household Expenditure(Rs.)', 'Income',
       'Savings', 'Medical Care'],
      dtype='object')

In [114]:
X = df[['Household Receipts (Rs.)', 'Household Expenditure(Rs.)', 'Income', 'Savings', 'Medical Care']]
poverty_line = df['Income'].median() 
print(poverty_line)
df['Above_Poverty_Line'] = (df['Income'] >= poverty_line).astype(int)
y = df['Above_Poverty_Line']

35908.0


### In Pakistan lowest insome also is 35k so we can consider people having income above 35k  

In [117]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [119]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [121]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [123]:
y_pred = model.predict(X_test_scaled)

In [125]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Model Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        11

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18



In [127]:
importances = model.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances}).sort_values(by="Importance", ascending=False)
print("\nFeature Importance:")
print(feature_importance_df)


Feature Importance:
                      Feature  Importance
2                      Income    0.396162
0    Household Receipts (Rs.)    0.304804
1  Household Expenditure(Rs.)    0.232508
4                Medical Care    0.062871
3                     Savings    0.003655


# Example of how to use it for a new person

In [130]:
new_person = pd.DataFrame([{
    'Household Receipts (Rs.)': 5000,
    'Household Expenditure(Rs.)': 4500,
    'Income': 4000,
    'Savings': 500,
    'Medical Care': 200
}])

prediction = model.predict(new_person)
if prediction[0] == 1:
    print("Eligible for micro-insurance")
else:
    print("Not prioritized for micro-insurance")

Eligible for micro-insurance


# Other factors to determine whether should give insurance or not 

In [133]:
df['Low_Savings'] = df['Savings'] < df['Savings'].median()  
df['High_Medical_Costs'] = df['Medical Care'] > (df['Income'] * 0.1) 
df['High_Expenditure_Ratio'] = df['Household Expenditure(Rs.)'] / df['Income'] > 0.8 

df['High_Risk'] = df[['Low_Savings', 'High_Medical_Costs', 'High_Expenditure_Ratio']].sum(axis=1) > 1 

print(df[['Income', 'Savings', 'Medical Care', 'Household Expenditure(Rs.)', 'High_Risk']])

    Income  Savings  Medical Care  Household Expenditure(Rs.)  High_Risk
0    41545  3578.22       1197.98                    37159.28      False
1    23192  2168.44        781.80                    21725.67       True
2    29049  2397.84        925.68                    27138.11       True
3    31373   993.41       1030.58                    30475.10       True
4    37643   593.63       1226.37                    36337.87       True
..     ...      ...           ...                         ...        ...
85   26711   110.72        675.56                    27236.87       True
86   31339 -1216.23        887.84                    32947.77       True
87   31029 -2223.61        886.00                    32735.92       True
88   38609  1681.60       1098.52                    35301.74       True
89   53959  2332.43       1187.36                    47068.97       True

[90 rows x 5 columns]


# Assesing using weighted scoring system 

In [136]:
weights = {
    "Income": 0.50,  
    "Savings": 0.20,  
    "Household Expenditure(Rs.)": 0.15,  
    "Medical Care": 0.15  
}

for feature in weights.keys():
    df[feature + "_norm"] = (df[feature] - df[feature].min()) / (df[feature].max() - df[feature].min())

df["Weighted_Score"] = (
    df["Income_norm"] * weights["Income"] +
    df["Savings_norm"] * weights["Savings"] +
    df["Household Expenditure(Rs.)_norm"] * weights["Household Expenditure(Rs.)"] +
    df["Medical Care_norm"] * weights["Medical Care"]
)

threshold = 0.5
df["Eligible_for_Insurance"] = df["Weighted_Score"] < threshold

print(df[["Income", "Savings", "Household Expenditure(Rs.)", "Medical Care", "Weighted_Score", "Eligible_for_Insurance"]])

    Income  Savings  Household Expenditure(Rs.)  Medical Care  Weighted_Score  \
0    41545  3578.22                    37159.28       1197.98        0.334358   
1    23192  2168.44                    21725.67        781.80        0.094560   
2    29049  2397.84                    27138.11        925.68        0.170979   
3    31373   993.41                    30475.10       1030.58        0.192442   
4    37643   593.63                    36337.87       1226.37        0.270263   
..     ...      ...                         ...           ...             ...   
85   26711   110.72                    27236.87        675.56        0.113922   
86   31339 -1216.23                    32947.77        887.84        0.168999   
87   31029 -2223.61                    32735.92        886.00        0.155333   
88   38609  1681.60                    35301.74       1098.52        0.278899   
89   53959  2332.43                    47068.97       1187.36        0.455584   

    Eligible_for_Insurance 

# Risk-Based Tiering Approach

In [139]:
income_threshold = df['Income'].median()
savings_threshold = df['Savings'].median()
expenditure_ratio_threshold = 0.8  
medical_cost_ratio_threshold = 0.1  

df['High_Risk'] = (df['Income'] < income_threshold) & \
                  (df['Household Expenditure(Rs.)'] / df['Income'] > expenditure_ratio_threshold) & \
                  (df['Savings'] < savings_threshold)

df['Moderate_Risk'] = (df['Income'] >= income_threshold) & \
                      ((df['Medical Care'] / df['Income'] > medical_cost_ratio_threshold) | (df['Savings'] < savings_threshold))

df['Low_Risk'] = (df['Income'] >= income_threshold) & \
                 (df['Savings'] >= savings_threshold) & \
                 (df['Household Expenditure(Rs.)'] / df['Income'] <= expenditure_ratio_threshold) & \
                 (df['Medical Care'] / df['Income'] <= medical_cost_ratio_threshold)

df['Risk_Tier'] = df[['High_Risk', 'Moderate_Risk', 'Low_Risk']].idxmax(axis=1).str.replace('_Risk', '')

print(df[['Income', 'Savings', 'Household Expenditure(Rs.)', 'Medical Care', 'Risk_Tier']])

    Income  Savings  Household Expenditure(Rs.)  Medical Care Risk_Tier
0    41545  3578.22                    37159.28       1197.98      High
1    23192  2168.44                    21725.67        781.80      High
2    29049  2397.84                    27138.11        925.68      High
3    31373   993.41                    30475.10       1030.58      High
4    37643   593.63                    36337.87       1226.37  Moderate
..     ...      ...                         ...           ...       ...
85   26711   110.72                    27236.87        675.56      High
86   31339 -1216.23                    32947.77        887.84      High
87   31029 -2223.61                    32735.92        886.00      High
88   38609  1681.60                    35301.74       1098.52  Moderate
89   53959  2332.43                    47068.97       1187.36  Moderate

[90 rows x 5 columns]
