## Package Imports

In [1]:
import pandas as pd
import numpy as np
from copy import copy
import configparser
import snowflake.connector
import os
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.primitives.asymmetric import dsa
from cryptography.hazmat.primitives import serialization

## Snowflake Connection (via Will Fraher's SF account)

In [2]:
# # Initializing Will's connection to Snowflake (using MTE ticket and public/private key)
# snowflake_pass = "will_f_password"
# cbg_query_folder = os.path.join(os.getcwd(), 'TDST Notebook Rework', 'CBG_Queries') # Need to add Will's file path

# with open("/tech/appl/default/user/wf96460e/rsa_snow.p8", "rb") as key: # Need to add Will's key location here
#     private_key= serialization.load_pem_private_key(
#         key.read(),
#         password= snowflake_pass.encode(),
#         backend=default_backend()
#     )

# pkb = private_key.private_bytes(
#     encoding=serialization.Encoding.DER,
#     format=serialization.PrivateFormat.PKCS8,
#     encryption_algorithm=serialization.NoEncryption())

# ctx = snowflake.connector.connect(
#     user='wf96460',
#     account='hfsg_prod.us-east-1',
#     private_key=pkb,
#     database='user_db',
#     schema='AD1_WF96460'
#     )

In [3]:
## Setting up Snowflake Connection
config = configparser.ConfigParser()

config.read('/tech/appl/default/user/pa08042e/toad.cfg')
snowflake_pass = config['SNOWFLAKE']['secret_passphrase']
cbg_query_folder = os.path.join(os.getcwd(), 'CBG_Queries')

with open("/tech/appl/default/user/pa08042e/rsa_snow.p8", "rb") as key:
    private_key= serialization.load_pem_private_key(
        key.read(),
        password= snowflake_pass.encode(),
        backend=default_backend()
    )

pkb = private_key.private_bytes(
    encoding=serialization.Encoding.DER,
    format=serialization.PrivateFormat.PKCS8,
    encryption_algorithm=serialization.NoEncryption())

ctx = snowflake.connector.connect(
    user='pa08042',
    account='hfsg_prod.us-east-1',
    private_key=pkb,
    database='user_db',
    schema='AD1_PA08042'
    )

## Modeled Relativities (provided manually by Kaitlyn DeBrusk)

In [4]:
modeled_relativities = pd.read_excel("MVP2.1 xCAT and CAT Territory Factors.xlsx", sheet_name="Main", engine="openpyxl")

# Ensures that CBG is the index and every CBG is 12 digits long
modeled_relativities['FIPS'] = modeled_relativities['FIPS'].astype(str)
modeled_relativities['FIPS'] = modeled_relativities['FIPS'].apply(lambda x: f'0{x}' if len(x) == 11 else x)
modeled_relativities = modeled_relativities.set_index('FIPS')

# Renaming columns for raw data tab
new_columns = list(modeled_relativities.columns)
for idx, col in enumerate(modeled_relativities.columns):
    if col[0] == "P":
        new_columns[idx] = col + " - Modeled"
modeled_relativities = modeled_relativities.rename(columns={col: new_col for col, new_col in zip(modeled_relativities.columns, new_columns)})
new_master = modeled_relativities

In [5]:
modeled_relativities

Unnamed: 0_level_0,State Abbr,P1 - Modeled,P2 - Modeled,P3 - Modeled,P4 - Modeled,P5 - Modeled,P6 - Modeled,P7 - Modeled,P8 - Modeled,P9 - Modeled,P10 - Modeled,P11 - Modeled
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
010970066004,AL,1.150129,0.762409,1.122,0.981192,0.784495,0.670,0.295,1.02,1.463,0.603711,1.00
010970073001,AL,1.119196,2.581505,1.122,0.923545,0.818486,0.926,0.277,0.96,1.422,0.637864,1.00
010970072022,AL,1.150129,2.946460,1.122,0.796766,0.767079,0.879,0.361,0.87,1.462,1.430014,1.00
010970073003,AL,1.150129,2.946460,1.122,0.886456,0.778971,0.926,0.361,0.87,1.422,0.727258,1.00
010970073004,AL,1.150129,0.789650,1.122,1.006800,0.858062,0.926,0.295,1.02,1.422,0.733585,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...
560330005003,WY,1.554342,1.882253,0.977,0.703784,0.690169,1.114,0.436,0.63,0.858,1.000000,0.13
560330005001,WY,1.503514,1.794011,0.974,0.870131,0.698026,0.878,0.584,0.73,0.617,1.000000,0.32
560050007003,WY,1.470032,1.563246,0.975,0.765703,0.700705,0.867,0.719,1.07,0.528,1.000000,0.50
560050007002,WY,1.457235,1.539810,0.975,0.817121,0.719308,0.858,0.658,0.94,0.495,1.000000,0.44


## Current Relativities (pulled from Smartsheets automatically)

In [6]:
def process_one_smartsheet(filename):

    # Get raw territory mappings / data
    territory_raw = pd.read_excel(filename, sheet_name="Territory",  engine="openpyxl", skiprows=2)
    territory_definitions_raw = pd.read_excel(filename, sheet_name="TerritoryDefinitions",  engine="openpyxl", skiprows=2)

    # Process territory factors
    territory_raw = territory_raw.drop(0)
    territory_raw[territory_raw.columns[1]] = territory_raw[territory_raw.columns[1]].ffill()
    territory_raw = territory_raw.rename(columns={territory_raw.columns[1]:'Type', 'Unnamed: 2': 'Territory'})
    territory_raw = territory_raw.drop(columns={'ColKey1'})
    territory_raw['Territory'] = territory_raw['Territory'].astype(float)
    territory_raw = territory_raw[territory_raw['Type'] == 'Building']

    # Process territory / CBG definitions
    territory_definitions_raw = territory_definitions_raw[[territory_definitions_raw.columns[1], 'Territory']]
    territory_definitions_raw = territory_definitions_raw.rename(columns={territory_definitions_raw.columns[0]:'Census Block Group'})

    # Merge territory definitions and factors
    merged = territory_raw.merge(territory_definitions_raw, left_on='Territory', right_on='Territory')
    merged = merged.drop(columns=['Type', 'Territory'])
    merged = merged.set_index('Census Block Group')
    return merged

In [7]:
# all_merged = []
# for file in os.listdir('Smartsheets'):
#     # print(f"Processing {file}...")
#     all_merged.append(process_one_smartsheet(f'Smartsheets/{file}'))

# all_merged_df = pd.concat(all_merged, axis=0)
# all_merged_df.to_csv('current_relativities.csv')

In [8]:
# Fix CBGs with the first digit omitted
dtype_spec = {0: str}
current_relativities = pd.read_csv("current_relativities.csv", dtype=dtype_spec)
current_relativities.drop_duplicates()
current_relativities['Census Block Group'] = current_relativities['Census Block Group'].astype(str)
current_relativities['Census Block Group'] = current_relativities['Census Block Group'].apply(lambda x: f'0{x}' if len(x) == 11 else x)
current_relativities = current_relativities.set_index('Census Block Group')

# Renaming columns for raw data tab
new_columns = list(current_relativities.columns)
for idx, col in enumerate(current_relativities.columns):
    if col[0:2] == "PG":
        new_columns[idx] = col + " - Current"

current_relativities = current_relativities.rename(columns={col: new_col for col, new_col in zip(current_relativities.columns, new_columns)})

In [9]:
new_master = new_master.join(current_relativities, how='left')

In [10]:
current_relativities

Unnamed: 0_level_0,PG1 - Current,PG2 - Current,PG3 - Current,PG4 - Current,PG5 - Current,PG6 - Current,PG7 - Current,PG8 - Current,PG9 - Current,PG10 - Current,PG11 - Current,PG12 - Current
Census Block Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
020130001001,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.00,1.000,0.0,0.00,0
020130001002,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.00,1.000,0.0,0.00,0
020130001003,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.00,1.000,0.0,0.00,0
020160001001,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.00,1.000,0.0,0.00,0
020160002001,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.00,1.000,0.0,0.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...
560459511001,1.527,1.523,0.970,0.566,0.593,0.870,0.797,1.34,0.451,0.0,0.50,0
560459511002,1.554,1.468,0.970,0.560,0.614,0.945,0.866,1.44,0.475,0.0,0.32,0
560459513001,1.370,1.424,0.971,0.596,0.619,0.887,1.154,1.44,0.473,0.0,0.32,0
560459513002,1.062,1.412,0.971,0.559,0.579,0.925,0.866,1.44,0.497,0.0,0.32,0


## EASI Census Data (Snowflake Query File: EASI_CBG.sql)

In [11]:
# Getting EASI data from Snowflake Connector
# with open(f'{cbg_query_folder}/EASI_CBG.sql', 'r') as EASI_query:
#     cs = ctx.cursor()
#     try:
#         cs.execute(EASI_query.read())
#         EASI_data = cs.fetch_pandas_all()
#     finally:
#         cs.close()
EASI_data = pd.read_csv("EASI query for Raw Data tab.csv")
# ctx.close()

In [12]:
# Ensures that CBG is the index and a string when merging to new_master
EASI_data['CBG'] = EASI_data['CBG'].str.replace('A','')
EASI_data = EASI_data.set_index('CBG')

In [13]:
# Merge with EASI data
new_master = new_master.join(EASI_data, how='left')

In [14]:
EASI_data

Unnamed: 0_level_0,County Name,Square Miles,Population Density,Population,Average Household Size,Urban Population,Rural Population,Male Population,Female Population,Population Aged 18 to 24,...,Education Attainment Associates Degree,Education Attainment Some College,Population Forecast 20242019,Household Growth 20192010,Household Forecast 20242019,Housing Vacant Units,Housing Owner Occupied,Housing Renter Occupied,Housing Median Rent,Housing Median Value Owner Households
CBG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
060590637021,"Orange, CA",0.141756,10052.484551,1425,2.71,1302,0,725,577,118,...,142,333,2.25,9.07,2.77,25,111,319,1330,527778
060590757011,"Orange, CA",0.264705,5757.352525,1524,2.98,1436,0,730,706,129,...,105,210,2.69,6.28,2.95,9,426,52,750,625000
060590994101,"Orange, CA",0.105409,11052.187195,1165,2.66,1071,0,520,551,97,...,133,158,2.75,9.00,2.98,15,140,260,1223,578313
060590992144,"Orange, CA",0.132024,6566.987820,867,1.69,817,0,342,475,8,...,20,317,2.88,6.40,3.11,23,451,33,2114,91724
060590992274,"Orange, CA",0.186519,12465.218021,2325,3.33,2196,0,1080,1116,227,...,133,357,2.88,6.06,3.14,22,348,312,1403,490517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
510030113032,"Albemarle, VA",1.256084,1141.643393,1434,1.74,1268,0,557,711,153,...,97,153,3.97,14.98,5.07,46,115,606,1106,210000
510150706002,"Augusta, VA",2.361233,484.916143,1145,2.15,859,231,533,557,378,...,12,13,1.05,5.68,2.56,25,148,222,255,207692
490111270032,"Davis, UT",8.919972,355.494389,3171,3.00,2559,98,1334,1323,230,...,173,419,5.68,20.68,6.65,55,807,78,1319,241026
490351139063,"Salt Lake, UT",0.150296,9674.242827,1454,3.03,1292,0,635,657,144,...,60,158,5.57,13.35,6.20,39,250,177,575,131538


## Home Operational Loss Data (Snowflake Query File: HOL_CBG.sql)

In [15]:
# Getting Home Operational Loss data from Snowflake Connector
with open(f'{cbg_query_folder}/HOL_CBG.sql', 'r') as HOL_query:
    cs = ctx.cursor()
    try:
        cs.execute(HOL_query.read())
        home_loss_data = cs.fetch_pandas_all()
    finally:
        cs.close()
# ctx.close()

# Renaming columns for raw data tab
home_loss_data = home_loss_data.fillna(0)
home_loss_data['GEOCD_CNSUS_BLOCK_GRP_ID'] = home_loss_data['GEOCD_CNSUS_BLOCK_GRP_ID'].astype(int).astype(str).apply(lambda x: f'0{x}' if len(x) == 11 else x)
home_loss_data = home_loss_data.set_index('GEOCD_CNSUS_BLOCK_GRP_ID')
home_loss_data = home_loss_data.rename(columns = {col: f"{col} - Prevail" for col in home_loss_data.columns})

In [16]:
new_master = new_master.join(home_loss_data, how='left')

In [17]:
home_loss_data

Unnamed: 0_level_0,Earned Exposures - Prevail,Earned Premium - Prevail,Capped Total Ult. Loss xCAT xHail - Prevail,Capped Fire Ult. Loss - Prevail,Capped Lightning Ult. Loss - Prevail,Capped Liability Ult. Loss - Prevail,Capped Theft Ult. Loss - Prevail,Capped Water Non-Weather Ult. Loss - Prevail,Capped Water Weather Ult. Loss - Prevail,Capped Wind Ult. Loss - Prevail,...,Lightning Loss Cost - Prevail,Liability Loss Cost - Prevail,Theft Loss Cost - Prevail,Water Non-Weather Loss Cost - Prevail,Water Weather Loss Cost - Prevail,Wind Loss Cost - Prevail,Hail Loss Cost - Prevail,Other Loss Cost - Prevail,Total Loss Cost xCAT xHail - Prevail,Total Loss Ratio xCAT xHail - Prevail
GEOCD_CNSUS_BLOCK_GRP_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,43426.408191662871,39142716.58689000,1.160220e+07,7347.7074,10838.88,656477.338,91822.8,7.559864e+06,2.101359e+06,159920.805,...,0.249592,15.117007,2.114446,174.084487,48.388958,3.68257,20.497104,23.362984,267.169243,0.296408
110010096011,10.352149999985,6403.86785000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
340170188002,27.135983333350,7092.95195000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
484391219053,81.102566666646,11363.99502000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
391535021022,195.400900000030,198832.35283000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
080010085435,0.054800000000,18.41096000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
132270506003,0.021916666666,58.32328000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
420950179022,0.038350000000,24.77808000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
290718009012,0.098633333334,354.47672000,0.000000e+00,0.0000,0.00,0.000,0.0,0.000000e+00,0.000000e+00,0.000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000


## Home Flow Data (Snowflake Query File: FLOW_CBG.sql)

In [18]:
# Getting Home Flow data from Snowflake Connector
with open(f'{cbg_query_folder}/FLOW_CBG.sql', 'r') as FLOW_query:
    cs = ctx.cursor()
    try:
        cs.execute(FLOW_query.read())
        home_flow_data = cs.fetch_pandas_all()
    finally:
        cs.close()
# ctx.close()

# Renaming columns for raw data tab
# home_flow_data = pd.read_csv("TDST Flow Query.csv", dtype={'GEOCD_CNSUS_BLOCK_GRP_ID': str})
home_flow_data = home_flow_data.drop_duplicates(subset='GEOCD_CNSUS_BLOCK_GRP_ID')
home_flow_data = home_flow_data.set_index('GEOCD_CNSUS_BLOCK_GRP_ID')
home_flow_data = home_flow_data.rename(columns = {col: f"{col} - Prevail" for col in home_flow_data.columns})
home_flow_data

Unnamed: 0_level_0,Issue Rate - Prevail,HIG / SRP - Prevail,AQP - Prevail,AIP - Prevail,Quote Volume - Prevail,Response Volume - Prevail
GEOCD_CNSUS_BLOCK_GRP_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
,0.54,1.10,950.80,781.03,5852.0,65650.0
230110101002,0.25,1.25,897.00,518.00,4.0,8.0
110010015001,0.00,1.41,2315.00,,1.0,10.0
132150020001,0.25,1.22,2371.13,2212.00,8.0,20.0
220330026011,0.00,0.14,465.50,,2.0,4.0
...,...,...,...,...,...,...
380170009043,,,,,,
421010330007,0.00,,,,0.0,2.0
340170174002,,,,,,
511552105002,0.00,,,,0.0,1.0


In [19]:
new_master = new_master.drop_duplicates()

In [20]:
new_master = new_master.join(home_flow_data, how='left')
new_master.rename_axis('CBG')

Unnamed: 0_level_0,State Abbr,P1 - Modeled,P2 - Modeled,P3 - Modeled,P4 - Modeled,P5 - Modeled,P6 - Modeled,P7 - Modeled,P8 - Modeled,P9 - Modeled,...,Hail Loss Cost - Prevail,Other Loss Cost - Prevail,Total Loss Cost xCAT xHail - Prevail,Total Loss Ratio xCAT xHail - Prevail,Issue Rate - Prevail,HIG / SRP - Prevail,AQP - Prevail,AIP - Prevail,Quote Volume - Prevail,Response Volume - Prevail
CBG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
010970066004,AL,1.150129,0.762409,1.122,0.981192,0.784495,0.670,0.295,1.02,1.463,...,,,,,0.00,,,,0.0,5.0
010970073001,AL,1.119196,2.581505,1.122,0.923545,0.818486,0.926,0.277,0.96,1.422,...,,,,,,,,,,
010970072022,AL,1.150129,2.946460,1.122,0.796766,0.767079,0.879,0.361,0.87,1.462,...,,,,,0.00,,,,0.0,1.0
010970073003,AL,1.150129,2.946460,1.122,0.886456,0.778971,0.926,0.361,0.87,1.422,...,,,,,0.00,,213.00,,1.0,2.0
010970073004,AL,1.150129,0.789650,1.122,1.006800,0.858062,0.926,0.295,1.02,1.422,...,,,,,0.00,,,,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560330005003,WY,1.554342,1.882253,0.977,0.703784,0.690169,1.114,0.436,0.63,0.858,...,,,,,0.00,1.52,3507.00,,1.0,8.0
560330005001,WY,1.503514,1.794011,0.974,0.870131,0.698026,0.878,0.584,0.73,0.617,...,,,,,0.00,,,,0.0,2.0
560050007003,WY,1.470032,1.563246,0.975,0.765703,0.700705,0.867,0.719,1.07,0.528,...,,,,,0.00,,,,0.0,4.0
560050007002,WY,1.457235,1.539810,0.975,0.817121,0.719308,0.858,0.658,0.94,0.495,...,0.0,0.0,0.0,0.0,1.00,0.99,285.00,285.00,1.0,3.0


## AAL Data (Snowflake Query File: AAL_CBG.sql)

In [21]:
# Getting AAL data from Snowflake Connector
with open(f'{cbg_query_folder}/AAL_CBG.sql', 'r') as AAL_query:
    cs = ctx.cursor()
    try:
        cs.execute(AAL_query.read())
        AAL_data = cs.fetch_pandas_all()
    finally:
        cs.close()
# ctx.close()

# Add AALs in there
AAL_data = AAL_data.set_index('CBG')
AAL_data = AAL_data.rename(columns = {col: f"{col} - Prevail" for col in AAL_data.columns})
new_master = new_master.join(AAL_data, how='left')

## ADL Data (Snowflake Query File: ADL_CBG.sql)

In [22]:
# Getting AAL data from Snowflake Connector
with open(f'{cbg_query_folder}/ADL_CBG.sql', 'r') as ADL_query:
    cs = ctx.cursor()
    try:
        cs.execute(ADL_query.read())
        ADL_data = cs.fetch_pandas_all()
    finally:
        cs.close()
ctx.close()

In [23]:
ADL_data = ADL_data.set_index("CBG").rename(columns = {col: f"{col} - Prevail" for col in ADL_data.columns})
ADL_data

Unnamed: 0_level_0,Fire Earned Premium - Prevail,Lightning Earned Premium - Prevail,Liability Earned Premium - Prevail,Theft Earned Premium - Prevail,Water Non-Weather Earned Premium - Prevail,Water Weather Earned Premium - Prevail,Wind Earned Premium - Prevail,Hail Earned Premium - Prevail,Other Earned Premium - Prevail,Hurricane Earned Premium - Prevail,...,Liability Incurred Loss Ratio - Prevail,Theft Incurred Loss Ratio - Prevail,Water Non-Weather Incurred Loss Ratio - Prevail,Water Weather Incurred Loss Ratio - Prevail,Wind Incurred Loss Ratio - Prevail,Hail Incurred Loss Ratio - Prevail,Other Incurred Loss Ratio - Prevail,Hurricane Incurred Loss Ratio - Prevail,Wildfire Incurred Loss Ratio - Prevail,Unknown Incurred Loss Ratio - Prevail
CBG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
010010202001,3108.10,59.80,448.23,149.41,5629.40,448.41,3138.00,3057.03,909.21,1310.20,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12
010010203001,3679.83,423.19,363.46,360.10,24048.53,1849.78,8759.81,15157.94,2323.49,16283.48,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12
010010204001,467.06,18.15,64.02,27.28,2281.51,155.54,815.32,2693.35,357.28,2629.77,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12
010010204002,329.04,15.96,57.06,29.64,1962.84,178.14,673.98,774.60,121.08,865.98,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12
010010205001,1415.76,99.96,281.88,147.96,4333.68,371.88,1221.00,3928.32,642.24,3145.32,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560419754006,709.24,,88.62,14.70,1244.18,433.86,1238.02,1930.88,549.64,,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12,0E-12
560430003011,1615.32,,315.98,60.20,1595.44,505.54,1814.54,6027.00,361.34,,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12,0E-12
560430003013,11955.48,,1964.82,254.67,4408.71,1418.37,5118.00,7909.38,738.48,,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12,0E-12
560430003021,1378.08,,84.87,28.17,992.25,261.00,822.51,1775.07,136.08,,...,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,0E-12,,0E-12,0E-12


In [24]:
new_master = new_master.join(ADL_data, how='left')

In [25]:
# Fixing data types of premium fields
earned_premium_columns = [
    'Fire Earned Premium - Prevail',
    'Hail Earned Premium - Prevail',
    'Liability Earned Premium - Prevail',
    'Lightning Earned Premium - Prevail',
    'Other Earned Premium - Prevail',
    'Theft Earned Premium - Prevail',
    'Water Non-Weather Earned Premium - Prevail',
    'Water Weather Earned Premium - Prevail',
    'Wind Earned Premium - Prevail'
]

for column in earned_premium_columns:
    new_master.loc[:, column] = new_master.loc[:, column].astype(float)
    
new_master[earned_premium_columns] = new_master[earned_premium_columns].replace(0, pd.NA)

## Adding Ultimate Loss Ratios

In [26]:
# Add premium-level loss ratios
new_master['Fire Ult. Loss Ratio - Prevail'] = new_master['Capped Fire Ult. Loss - Prevail'] / new_master['Fire Earned Premium - Prevail']
new_master['Hail Ult. Loss Ratio - Prevail'] = new_master['Capped Hail Ult. Loss - Prevail'] / new_master['Hail Earned Premium - Prevail']
new_master['Liability Ult. Loss Ratio - Prevail'] = new_master['Capped Liability Ult. Loss - Prevail'] / new_master['Liability Earned Premium - Prevail']
new_master['Lightning Ult. Loss Ratio - Prevail'] = new_master['Capped Lightning Ult. Loss - Prevail'] / new_master['Lightning Earned Premium - Prevail']
new_master['Other Ult. Loss Ratio - Prevail'] = new_master['Capped Other Ult. Loss - Prevail'] / new_master['Other Earned Premium - Prevail']
new_master['Theft Ult. Loss Ratio - Prevail'] = new_master['Capped Theft Ult. Loss - Prevail'] / new_master['Theft Earned Premium - Prevail']
new_master['Water Non-Weather Ult. Loss Ratio - Prevail'] = new_master['Capped Water Non-Weather Ult. Loss - Prevail'] / new_master['Water Non-Weather Earned Premium - Prevail']
new_master['Water Weather Ult. Loss Ratio - Prevail'] = new_master['Capped Water Weather Ult. Loss - Prevail'] / new_master['Water Weather Earned Premium - Prevail']
new_master['Wind Ult. Loss Ratio - Prevail'] = new_master['Capped Wind Ult. Loss - Prevail'] / new_master['Wind Earned Premium - Prevail']

## DTC Data (provided manually by Kaitlyn DeBrusk)

In [27]:
# DUMMY DTC FACTORS
# REPLACE THIS WITH REAL ONCE WHEN KAITLIN GIVES IT TO YOU
DTC = pd.DataFrame(index=new_master.index)

for peril in range(1,13):
    DTC[f"PG{peril} - DTC"] = 1.0

In [28]:
new_master[DTC.columns] = DTC

In [29]:
new_master

Unnamed: 0_level_0,State Abbr,P1 - Modeled,P2 - Modeled,P3 - Modeled,P4 - Modeled,P5 - Modeled,P6 - Modeled,P7 - Modeled,P8 - Modeled,P9 - Modeled,...,PG3 - DTC,PG4 - DTC,PG5 - DTC,PG6 - DTC,PG7 - DTC,PG8 - DTC,PG9 - DTC,PG10 - DTC,PG11 - DTC,PG12 - DTC
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
010970066004,AL,1.150129,0.762409,1.122,0.981192,0.784495,0.670,0.295,1.02,1.463,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
010970073001,AL,1.119196,2.581505,1.122,0.923545,0.818486,0.926,0.277,0.96,1.422,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
010970072022,AL,1.150129,2.946460,1.122,0.796766,0.767079,0.879,0.361,0.87,1.462,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
010970073003,AL,1.150129,2.946460,1.122,0.886456,0.778971,0.926,0.361,0.87,1.422,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
010970073004,AL,1.150129,0.789650,1.122,1.006800,0.858062,0.926,0.295,1.02,1.422,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560330005003,WY,1.554342,1.882253,0.977,0.703784,0.690169,1.114,0.436,0.63,0.858,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
560330005001,WY,1.503514,1.794011,0.974,0.870131,0.698026,0.878,0.584,0.73,0.617,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
560050007003,WY,1.470032,1.563246,0.975,0.765703,0.700705,0.867,0.719,1.07,0.528,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
560050007002,WY,1.457235,1.539810,0.975,0.817121,0.719308,0.858,0.658,0.94,0.495,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Post-Processing and Uploading to CSV

In [30]:
# It's called: We do a little post-processing
new_master = new_master.fillna(0)
new_master = new_master.infer_objects(copy=False)
new_master['County Name'] = new_master['County Name'].str[:-4]

  new_master = new_master.fillna(0)


In [31]:
ordered_columns = ['County Name', 'State Abbr', 'Population', 'Earned Premium - Prevail', 'Earned Exposures - Prevail', 'Capped Total Ult. Loss xCAT xHail - Prevail', 'PG1 - Current', 'PG2 - Current', 'PG3 - Current', 'PG4 - Current',
       'PG5 - Current', 'PG6 - Current', 'PG7 - Current', 'PG8 - Current',
       'PG9 - Current', 'PG10 - Current', 'PG11 - Current',
       'PG12 - Current', 'P1 - Modeled', 'P2 - Modeled', 'P3 - Modeled',
       'P4 - Modeled', 'P5 - Modeled', 'P6 - Modeled', 'P7 - Modeled',
       'P8 - Modeled', 'P9 - Modeled', 'P10 - Modeled', 'P11 - Modeled',
        'Square Miles','Population Density', 'Average Household Size',
       'Urban Population', 'Rural Population', 'Male Population',
       'Female Population', 'Population Aged 18 to 24',
       'Population Aged 25 to 34', 'Population Aged 35 to 44',
       'Population Aged 45 to 54', 'Population Aged 55 to 64',
       'Population Aged 65 to 74', 'Population Aged 75 to 84',
       'Population Aged 85 Years and Over',
       'Householder Aged Under 25 Years',
       'Householder Aged 25 to 34 Years',
       'Householder Aged 35 to 44 Years',
       'Householder Aged 45 to 54 Years',
       'Householder Aged 55 to 64 Years',
       'Householder Aged 65 to 74 Years',
       'Householder Aged 75 to 84 Years',
       'Householder Aged 85 Years and Over', 'Householder Income Median',
       'Education Attainment Doctorate Degree',
       'Education Attainment Professional Degree',
       'Education Attainment Masters Degree',
       'Education Attainment Bachelors Degree',
       'Education Attainment Associates Degree',
       'Education Attainment Some College',
       'Population Forecast 20242019', 'Household Growth 20192010',
       'Household Forecast 20242019', 'Housing Vacant Units',
       'Housing Owner Occupied', 'Housing Renter Occupied',
       'Housing Median Rent', 'Housing Median Value Owner Households']

non_ordered_columns = [col for col in new_master if col not in ordered_columns]
new_order = ordered_columns + non_ordered_columns

new_master = new_master[new_order].rename_axis('CBG')

In [32]:
# Sorted dataframe by CBG
new_master = new_master.sort_index()

In [33]:
new_master.to_csv("raw_data_cbg.csv")