# Risk Score Definition

In [1]:
!pip install pandas openpyxl



In [2]:
!pip install numpy



In [3]:
import pandas as pd
import numpy as np

In [4]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Import data
damage_all = pd.read_csv("mergedData/damage_all.csv")

In [6]:
# Convert PREMIUM_AMOUNT to float
damage_all['PREMIUM_AMOUNT'] = damage_all['PREMIUM_AMOUNT'].astype(float)

# Calculate NET_PREMIUM_AMOUNT as 60% of PREMIUM_AMOUNT
damage_all['NET_PREMIUM_AMOUNT'] = damage_all['PREMIUM_AMOUNT'] * 0.6

In [7]:
# Calculate PIPE_PREMIUM_AMOUNT based on CORPORATE_DEVISION
# Map devision percentages
devisionPercentages = {
    'VGV': 0.43,
    'VHV': 0.11,
    'H&H': 0.07,
    'W&W': 0.32
}

# Calculate PIPE_PREMIUM_AMOUNT
damage_all['PIPE_PREMIUM_AMOUNT'] = damage_all['DIVISION_LEVEL_02'].map(devisionPercentages) * damage_all['NET_PREMIUM_AMOUNT']

In [8]:
# Calculate RISK_SCORE as PIPE_PREMIUM_AMOUNT / EXPENSE
damage_all['RISK_SCORE'] = damage_all['PIPE_PREMIUM_AMOUNT'] / damage_all['EXPENSE']
# When expense = 0 the division returns infinity. Instead return risk score = 0
damage_all['RISK_SCORE'] = damage_all['RISK_SCORE'].replace([float('inf')], 0)

In [9]:
damage_all[['EXPENSE', 'PREMIUM_AMOUNT', 'NET_PREMIUM_AMOUNT', 'PIPE_PREMIUM_AMOUNT', 'RISK_SCORE']].dropna(subset=['RISK_SCORE']).head()

Unnamed: 0,EXPENSE,PREMIUM_AMOUNT,NET_PREMIUM_AMOUNT,PIPE_PREMIUM_AMOUNT,RISK_SCORE
1,0.0,199.2,119.52,51.3936,0.0
6,0.0,190.67,114.402,49.19286,0.0
8,0.0,331.12,198.672,21.85392,0.0
22,0.0,203.84,122.304,52.59072,0.0
26,0.0,699.6,419.76,180.4968,0.0


In [10]:
# Problem: a lot of nas in RISK_SCORE
print(len(damage_all))
print(damage_all['RISK_SCORE'].isna().sum())
print(len(damage_all.dropna(subset=['RISK_SCORE'])))

252407
229000
23407


In [11]:
# Possible reason: contract_column_add has way less rows than all the other contract tables put together
# When merging the tables, we are left with a lot of nas in the premium amount
print(damage_all['PREMIUM_AMOUNT'].isna().sum())

228999


In [12]:
# Save dataset to csv
damage_all.to_csv('damage_all.csv', index=False)

In [14]:
# Reload file if necessary
damage_all = pd.read_csv("mergedData/damage_all.csv")