# Risk Score Definition

In [1]:
!pip install pandas openpyxl



In [2]:
!pip install numpy



In [3]:
import pandas as pd
import numpy as np

In [4]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Import data
damage_all = pd.read_csv("../mergedData/damage_all.csv")

In [6]:
# Convert PREMIUM_AMOUNT to float
damage_all['PREMIUM_AMOUNT'] = damage_all['PREMIUM_AMOUNT'].astype(float)

# Calculate NET_PREMIUM_AMOUNT as 60% of PREMIUM_AMOUNT
damage_all['NET_PREMIUM_AMOUNT'] = damage_all['PREMIUM_AMOUNT'] * 0.6

In [7]:
# Calculate PIPE_PREMIUM_AMOUNT based on CORPORATE_DEVISION
# Map devision percentages
devisionPercentages = {
    'VGV': 0.43,
    'VHV': 0.11,
    'H&H': 0.07,
    'W&W': 0.32
}

# Calculate PIPE_PREMIUM_AMOUNT
damage_all['PIPE_PREMIUM_AMOUNT'] = damage_all['DIVISION_LEVEL_02'].map(devisionPercentages) * damage_all['NET_PREMIUM_AMOUNT']

In [8]:
# Calculate RISK_SCORE as PIPE_PREMIUM_AMOUNT / EXPENSE
damage_all['RISK_SCORE'] = damage_all['EXPENSE'] / damage_all['PIPE_PREMIUM_AMOUNT']
# When expense = 0 the division returns infinity. Instead return risk score = 0
# damage_all['RISK_SCORE'] = damage_all['RISK_SCORE'].replace([float('inf')], 0)
damage_all['RISK_SCORE'] = damage_all['RISK_SCORE'].replace(0, np.nan)

In [9]:
damage_all[['EXPENSE', 'PREMIUM_AMOUNT', 'NET_PREMIUM_AMOUNT', 'PIPE_PREMIUM_AMOUNT', 'RISK_SCORE']].dropna(subset=['RISK_SCORE']).head()

Unnamed: 0,EXPENSE,PREMIUM_AMOUNT,NET_PREMIUM_AMOUNT,PIPE_PREMIUM_AMOUNT,RISK_SCORE
47,269.51,1533.84,920.304,395.73072,0.681044
57,545.5,1328.0,796.8,342.624,1.592124
59,660.45,188.73,113.238,48.69234,13.563735
88,2885.25,1253.37,752.022,323.36946,8.922457
92,3047.1,510.9,306.54,131.8122,23.11698


In [10]:
# Problem: a lot of nas in RISK_SCORE
print(len(damage_all))
print(damage_all['RISK_SCORE'].isna().sum())
print(len(damage_all.dropna(subset=['RISK_SCORE'])))

252407
234557
17850


In [11]:
# Possible reason: contract_column_add has way less rows than all the other contract tables put together
# When merging the tables, we are left with a lot of nas in the premium amount
print(damage_all['PREMIUM_AMOUNT'].isna().sum())

228999


In [12]:
# Save dataset to csv
damage_all.to_csv('../mergedData/damage_all.csv', index=False)

In [13]:
# Reload file if necessary
damage_all = pd.read_csv("../mergedData/damage_all.csv")