In [1]:
import os

In [2]:
%pwd

'c:\\Users\\KUNAL MEHTA\\Desktop\\Data Science Training\\Projects\\Auto-Insurance-Risk-Profiling\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\KUNAL MEHTA\\Desktop\\Data Science Training\\Projects\\Auto-Insurance-Risk-Profiling'

In [5]:
import pandas as pd

In [6]:
potential_customers = pd.read_csv('artifacts\predictions\potential_customers_with_predictions.csv')

In [9]:
potential_customers.head()

Unnamed: 0,quote_number,gender,agecat,date_of_birth,credit_score,area,traffic_index,veh_age,veh_body,veh_value,age,claim_probability,claim,claim_amount
0,29323463,F,4,1968-04-14,750.0,C,124.5,2,HBACK,0.741937,49,0.02863,0,184.407315
1,16732441,M,2,1984-10-04,567.0,F,118.0,3,STNWG,1.124605,33,0.390885,1,411.259979
2,68744228,M,4,1962-11-30,378.0,D,111.4,3,HDTOP,1.210154,55,0.805233,1,11090.909096
3,30875047,F,3,1973-05-17,807.0,E,42.0,4,UTE,1.088225,44,0.016767,0,392.195934
4,37049210,M,6,1945-10-02,844.0,C,111.0,1,HBACK,1.088225,72,0.122363,0,771.758385


In [26]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class RiskProfilesConfig:
    root_dir: Path
    potential_customers_with_predictions_data_path: Path
    risk_profiles_path: Path
    params: dict

In [11]:
from AutoInsurance.constants import *
from AutoInsurance.utils.common import read_yaml, create_directories, save_json

In [30]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_risk_profiles_config(self) -> RiskProfilesConfig:
        config = self.config.risk_profiles
        params = self.params.RiskProfiles

        create_directories([config.root_dir])

        risk_profiles_config = RiskProfilesConfig(
            root_dir= Path(config.root_dir),
            potential_customers_with_predictions_data_path= Path(config.potential_customers_with_predictions_data_path),
            risk_profiles_path= Path(config.risk_profiles_path),
            params= params
        )

        return risk_profiles_config

In [14]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

In [33]:
class RiskProfiling:
    def __init__(self, config: RiskProfilesConfig):
        self.config = config
        self.params = config.params

    def load_data(self):
        return pd.read_csv(Path(self.config.potential_customers_with_predictions_data_path))

    def save_data(self, data: pd.DataFrame):
        data.to_csv(Path(self.config.risk_profiles_path), index=False)

    def normalize_data(self, data: pd.DataFrame):
        scaler = MinMaxScaler()
        data[['normalized_claim_probability', 'normalized_claim_amount']] = scaler.fit_transform(
            data[['claim_probability', 'claim_amount']]
        )

        joblib.dump(scaler, Path('artifacts/risk_profiles'))
        
        return data

    def segment_by_claim_probability(self, data: pd.DataFrame):
        quantiles = data['claim_probability'].quantile(self.params['claim_probability_thresholds'])
        data['risk_profile_probability'] = pd.cut(
            data['claim_probability'],
            bins=[0] + quantiles.tolist() + [1],
            labels=['Low', 'Medium', 'High']
        )
        return data

    def segment_by_claim_amount(self, data: pd.DataFrame):
        customers_with_claims = data[data['claim'] == 1].copy()
        quantiles = customers_with_claims['claim_amount'].quantile(self.params['claim_amount_thresholds'])
        customers_with_claims['risk_profile_cost'] = pd.cut(
            customers_with_claims['claim_amount'],
            bins=[0] + quantiles.tolist() + [customers_with_claims['claim_amount'].max()],
            labels=['Low', 'Medium', 'High']
        )
        data = data.merge(customers_with_claims[['risk_profile_cost']], left_index=True, right_index=True, how='left')
        if 'No Claim' not in data['risk_profile_cost'].cat.categories:
            data['risk_profile_cost'] = data['risk_profile_cost'].cat.add_categories('No Claim')
        data['risk_profile_cost'] = data['risk_profile_cost'].fillna('No Claim')
        return data

    def apply_dynamic_weighting(self, data: pd.DataFrame):
        weights_probability = self.params['weights_probability']
        weights_cost = self.params['weights_cost']

        data['weighted_probability_score'] = data.apply(
            lambda x: x['normalized_claim_probability'] * weights_probability[x['risk_profile_probability']], axis=1
        )
        data['weighted_cost_score'] = data.apply(
            lambda x: x['normalized_claim_amount'] * weights_cost[x['risk_profile_cost']], axis=1
        )
        return data

    def calculate_combined_risk_score(self, data: pd.DataFrame):
        data['dynamic_combined_risk_score'] = data['weighted_probability_score'] + data['weighted_cost_score']
        return data

    def segment_into_risk_groups(self, data: pd.DataFrame):
        quantiles = data['dynamic_combined_risk_score'].quantile(self.params['risk_score_thresholds'])
        data['risk_group'] = pd.cut(
            data['dynamic_combined_risk_score'],
            bins=[0] + quantiles.tolist() + [data['dynamic_combined_risk_score'].max()],
            labels=['Low Risk', 'Medium Risk', 'High Risk']
        )
        return data

    def process_data(self):
        data = self.load_data()
        data = self.normalize_data(data)
        data = self.segment_by_claim_probability(data)
        data = self.segment_by_claim_amount(data)
        data = self.apply_dynamic_weighting(data)
        data = self.calculate_combined_risk_score(data)
        data = self.segment_into_risk_groups(data)
        self.save_data(data)
        print(data.head())
        return data


In [34]:
try:
    config = ConfigurationManager()
    risk_profiles_config = config.get_risk_profiles_config()
    predictions_config = RiskProfiling(config = risk_profiles_config)
    predictions_config.process_data()

except Exception as e:
    raise e

[2024-05-24 01:25:17,133: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-24 01:25:17,141: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-24 01:25:17,146: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-05-24 01:25:17,147: INFO: common: created directory at: artifacts]
[2024-05-24 01:25:17,149: INFO: common: created directory at: artifacts/risk_profiles]
   quote_number gender  agecat date_of_birth  credit_score area  \
0      29323463      F       4    1968-04-14         750.0    C   
1      16732441      M       2    1984-10-04         567.0    F   
2      68744228      M       4    1962-11-30         378.0    D   
3      30875047      F       3    1973-05-17         807.0    E   
4      37049210      M       6    1945-10-02         844.0    C   

   traffic_index  veh_age veh_body  veh_value  ...  claim  claim_amount  \
0          124.5        2    HBACK   0.741937  ...      0    184.407315   
1          118.0        3   