# Get location rate

Author: Mo Al Elew

Input a Michigan address and get its corresponding location rate data

**What notebook does/produces:**

Implements a function that reads in an insurer's rate table and filters for a given address's location

**Issue/problem being solved:**

There are multiple insurer's that we are analyzing in their own siloed environments. I need a single notebook where I can quickly lookup the location rate for a given address.

**Strategy to solve:**

1. Geocode address using Geocodio API
2. Pull insurer rate table GIS file
3. Execute `within` operation on the geocoded point and the rate table geographies
4. Calculate percentile the location rate to contextualize and enable inter-insurer comparison  
5. Filter for relevant columns
6. Concat insurers data


**Notes**

- Address CSV should be in the format: `Name, City, State, Zip`

In [None]:
import geopandas as gpd
import pandas as pd
from geocodio import GeocodioClient
from scipy import stats
from shapely import intersection
from shapely.geometry import Point
from shapely.ops import nearest_points

# Constants

In [None]:
ADDRESSES_FP = "../00_misc/inputs/address_to_pull.csv"
# API_KEY = ""  # insert GeoCodio API key for coordinate geocoding

In [None]:
EXPORT_COLUMNS = [
    "name",
    "address",
    "company",
    "location_effect",
    "effect_percentile",
    "geo_id",
    "geo_name",
    "white_pct",
    "black_pct",
    "median_income",
    "bg_median_income",
    "geography_factor_id",
    "geography_type",
]

INSURER_MAP_FPS = {
    "AllState": "../02_allstate/outputs/allstate_auto_gis.geojson",
    "Auto Club Group": "../03_auto_club_group/outputs/autoclubgroup_auto_gis.geojson",
    "Liberty Mutual": "../04_liberty_mutual/outputs/libertymutual_auto_gis.geojson",
    "State Farm": "../05_state_farm/outputs/statefarm_auto_gis.geojson",
    "Citizens": "../07_citizens/outputs/citizens_auto_gis.geojson",
}

# Read rate maps

In [None]:
INSURER_GDFS = {insurer: gpd.read_file(fp) for insurer, fp in INSURER_MAP_FPS.items()}

# Read addresses

In [None]:
DF_ADDRESSES = pd.read_csv(ADDRESSES_FP, dtype=str)
address_series = (
    DF_ADDRESSES["Street"]
    + ", "
    + DF_ADDRESSES["City"]
    + ", "
    + DF_ADDRESSES["State"]
    + " "
    + DF_ADDRESSES["Zip"]
)
address_dict = dict(zip(DF_ADDRESSES["Name"], address_series))
address_dict

# Geocodio client

In [None]:
geocodio_client = GeocodioClient(API_KEY)

# Helper Functions

In [None]:
def read_insurer_rate_table_map(insurer):
    return gpd.read_file(INSURER_MAP_FPS[insurer])

In [None]:
def geocode_address_to_point(address):
    geocoding_results = geocodio_client.geocode(address)
    return Point(
        [geocoding_results["results"][0]["location"]["lng"]],
        [geocoding_results["results"][0]["location"]["lat"]],
    )

In [None]:
def percentile_of_value(val, series, round_precision=1, percentile_kind="strict"):
    return round(
        stats.percentileofscore(series, val, kind="strict"),
        round_precision,
    )

In [None]:
def nearest_geometries(point, point_geometries):
    multipoint = point_geometries.unary_union
    queried_geom, nearest_geom = nearest_points(point, multipoint)
    return nearest_geom


def nearest_rate_unit(gdf_rates, point):
    point_geom = gdf_rates.geometry
    ng = nearest_geometries(point, point_geom)
    return gdf_rates[gdf_rates["geometry"] == ng].copy()

In [None]:
def get_location_rate(address, insurer, name=None):
    try:
        INSURER_GDFS[insurer]
    except KeyError:
        print(
            f"Insurer argument must be present in INSURER_MAPS_FPS such as {INSURER_GDFS.keys()}"
        )

    point = geocode_address_to_point(address)
    gdf_rate_table = INSURER_GDFS[insurer]
    gdf_point_entry = gdf_rate_table[gdf_rate_table.geometry.contains(point)].copy()

    if len(gdf_point_entry) == 0:
        gdf_point_entry = nearest_rate_unit(gdf_rate_table, point)

    if len(gdf_point_entry) == 1:
        effect = gdf_point_entry.iloc[0]["location_effect"]
        gdf_point_entry["effect_percentile"] = percentile_of_value(
            effect, gdf_rate_table["location_effect"]
        )

    gdf_point_entry["name"] = name
    gdf_point_entry["address"] = address

    return gdf_point_entry[
        [col for col in EXPORT_COLUMNS if col in gdf_point_entry.columns]
    ]

In [None]:
dfs = [
    pd.concat(
        [
            get_location_rate(address, insurer, name)
            for name, address in address_dict.items()
        ]
    )
    for insurer in INSURER_MAP_FPS.keys()
]

In [None]:
df_export = pd.concat(dfs)
df_export.sort_values(["name", "company"])

In [None]:
df_export.to_csv("./outputs/location_rates_pulled.csv", index=False)