# No crime related coverages analysis

In [1]:
import geopandas as gpd
import pandas as pd

# Constants and helper functions

In [2]:
DATA_FP = "./outputs/statefarm_auto_gis.geojson"

In [3]:
CRIME_RELATED_COVERAGES = [
    "rate_comp_fctr",
]

RATE_Q_LABELS = [
    "lowest effect",
    "middle low",
    "median",
    "middle high",
    "highest effect",
]

GEOID_GROUP_BY_COLS = [
    "generic_location_based_premium",
    "non_crime_generic_premium",
    "white_tot",
    "black_tot",
    "tot_pop",
    "density",
]

QUANTILE_GROUP_BY_COLS = ["black_tot", "white_tot", "tot_pop"]

In [4]:
ROUNDING_PRECISION = 2

In [5]:
def get_rate_columns(df_rate_table):
    RATE_PREFIX = "rate_"
    return [col for col in df_rate_table.columns if col.startswith(RATE_PREFIX)]


def prptn_to_pct(val, precision=ROUNDING_PRECISION):
    return round(val, precision) * 100

# Read data

In [6]:
GDF = gpd.read_file(DATA_FP)
GDF["tot_pop"] = GDF["total_pop"]

# Process

In [7]:
non_crime_coverages = [
    col for col in get_rate_columns(GDF) if col not in CRIME_RELATED_COVERAGES
]
gdf = GDF.copy()

In [8]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 175436 entries, 0 to 175435
Data columns (total 51 columns):
 #   Column                          Non-Null Count   Dtype   
---  ------                          --------------   -----   
 0   company                         175436 non-null  object  
 1   vehicle_type                    175436 non-null  object  
 2   factor_circumvented             175436 non-null  object  
 3   factor_name                     175436 non-null  object  
 4   geography_factor                175436 non-null  object  
 5   geography_type                  175436 non-null  object  
 6   geography_factor_id             175436 non-null  object  
 7   latitude                        175436 non-null  float64 
 8   longitude                       175436 non-null  float64 
 9   county_fips                     175435 non-null  object  
 10  tract_fips                      0 non-null       object  
 11  block_group_fips                0 non-null       object  

In [9]:
gdf["non_crime_generic_premium"] = round(gdf[non_crime_coverages].sum(axis=1))

gdf["non_crime_location_effect"] = round(
    gdf["non_crime_generic_premium"] / gdf["non_crime_generic_premium"].median(),
    ROUNDING_PRECISION,
)

# Analysis

## Rate quantiles

I average the generic rate for each ZCTA `geo_id` 

## Average rate

I average the rates by geographic id to avoid double counting 

In [10]:
gdf_groupby_geo_id = gdf.groupby("geo_id")[GEOID_GROUP_BY_COLS].mean()

gdf_groupby_geo_id["effect_quantile"] = pd.qcut(
    gdf_groupby_geo_id["generic_location_based_premium"],
    q=len(RATE_Q_LABELS),
    labels=RATE_Q_LABELS,
)
gdf_groupby_geo_id["non_crime_effect_quantile"] = pd.qcut(
    gdf_groupby_geo_id["non_crime_generic_premium"],
    q=len(RATE_Q_LABELS),
    labels=RATE_Q_LABELS,
)

This calculates (subset in quantile / total in quantile)

For example: TK% of people living in a top quarter of rates are Black.

In [11]:
gdf_groupby_quantiles = gdf_groupby_geo_id.groupby("effect_quantile", observed=False)[
    QUANTILE_GROUP_BY_COLS
].sum()


print("This calculates (group subset in quantile / total group population)")
df_distribution = prptn_to_pct(
    gdf_groupby_quantiles.div(gdf_groupby_quantiles.sum(axis=0), axis=1), 3
)
df_distribution

This calculates (group subset in quantile / total group population)


Unnamed: 0_level_0,black_tot,white_tot,tot_pop
effect_quantile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lowest effect,5.7,25.3,22.5
middle low,6.1,23.4,20.4
median,9.9,21.8,19.9
middle high,16.3,20.6,20.5
highest effect,61.9,8.9,16.7


In [12]:
gdf_groupby_quantiles = gdf_groupby_geo_id.groupby("non_crime_effect_quantile", observed=False)[
    QUANTILE_GROUP_BY_COLS
].sum()


print("This calculates (group subset in quantile / total group population)")
df_distribution = prptn_to_pct(
    gdf_groupby_quantiles.div(gdf_groupby_quantiles.sum(axis=0), axis=1), 3
)
df_distribution

This calculates (group subset in quantile / total group population)


Unnamed: 0_level_0,black_tot,white_tot,tot_pop
non_crime_effect_quantile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lowest effect,3.6,25.3,21.7
middle low,5.6,22.9,19.9
median,11.8,22.5,21.2
middle high,17.5,20.3,20.5
highest effect,61.5,9.0,16.7
