Various misc. functions and code imported into other notebooks

In [1]:
import json

import altair as alt
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from bs4 import BeautifulSoup as Soup
from scipy import stats

In [2]:
CHART_COLS = [
    "Effect quintile",
    "Lowest",
    "Lower middle",
    "Median",
    "Upper middle",
    "Highest",
]

# Functions

In [3]:
# adaptted from: https://stackabuse.com/calculating-spearmans-rank-correlation-coefficient-in-python-with-pandas/
def display_correlation(df, method="pearson"):
    r = df.corr(method=method)
    plt.figure(figsize=(10, 6))
    heatmap = sns.heatmap(r, vmin=-1, vmax=1, annot=True)
    plt.title(f"{method} correlation")
    return r


def plot_data_corr(df, title, color="green"):
    r = display_correlation(df)
    fig, ax = plt.subplots(nrows=1, ncols=len(df.columns) - 1, figsize=(14, 3))
    for i in range(1, len(df.columns)):
        ax[i - 1].scatter(df["X"], df.values[:, i], color=color)
        ax[i - 1].title.set_text(title[i] + "\n r = " + "{:.2f}".format(r.values[0, i]))
        ax[i - 1].set(xlabel=df.columns[0], ylabel=df.columns[i])
    fig.subplots_adjust(wspace=0.7)
    plt.show()

In [4]:
def percentile_of_score(value, series, kind="strict", round_precision=1):
    return round(
        stats.percentileofscore(series, value, kind=kind),
        round_precision,
    )

In [5]:
def prptn_to_pct(val, precision=3):
    return round(val, precision) * 100

In [6]:
def datawrapper_race_distribution(df_distribution, insurer):
    df_distribution.index.names = ["race"]
    df_distribution.columns = ["Black", "White", "Total"]
    df_distribution = df_distribution.transpose()
    df_distribution["Insurer"] = insurer
    return df_distribution


def datawrapper_income_distribution(df_distribution, insurer):
    df_distribution.index.names = ["income"]
    df_distribution.columns = [
        "Lowest income",
        "Lower income",
        "Middle income",
        "Higher income",
        "Highest incomes",
    ]
    df_distribution = df_distribution.transpose()
    df_distribution["Insurer"] = insurer
    return df_distribution


def datawrapper_pop_density_distribution(df_distribution, insurer):
    df_distribution.index.names = ["Population density"]
    df_distribution.columns = [
        "Lowest density",
        "Lower density",
        "Middle density",
        "Higher density",
        "Highest density",
    ]
    df_distribution = df_distribution.transpose()
    df_distribution["Insurer"] = insurer
    return df_distribution

In [7]:
def stacked_hbar(
    df,
    x,
    y,
    color,
    color_order="descending",
    sort_y="descending",
    height=500,
    width=700,
    title="Chart",
    order="ascending",
):
    return (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(
                x,
            ),
            y=alt.Y(
                y,
                sort=sort_y,
            ),
            color=alt.Color(
                color,
            ),
            order=alt.Order(order),
        )
        .properties(height=height, width=height, title=title)
    )


def stacked_race_hbar(df_distribution, title="Chart"):
    df_temp = df_distribution.reset_index()
    df_temp.columns = [
        "Effect quintile",
        "Black population",
        "White population",
        "Total population",
    ]
    df_temp = pd.melt(
        df_temp,
        id_vars="Effect quintile",
        value_name="Percent",
        var_name="Population group",
    )
    df_temp["Effect quintile"] = df_temp["Effect quintile"].astype("category")
    return stacked_hbar(
        df_temp,
        x="Percent",
        y="Population group",
        color="Effect quintile",
        sort_y=[
            "Total population",
            "White population",
            "Black population",
        ],
        title=title,
        order="color_Effect_quintile_sort_index:Q",
    )


def stacked_income_hbar(df_distribution, title="Chart", columns=CHART_COLS):
    df_temp = df_distribution.reset_index()
    if columns:
        df_temp.columns = columns
    df_temp = pd.melt(
        df_temp,
        id_vars="Effect quintile",
        value_name="Percent",
        var_name="Income group",
    )
    df_temp["Effect quintile"] = df_temp["Effect quintile"].astype("category")
    return stacked_hbar(
        df_temp,
        x="Percent",
        y="Income group",
        color="Effect quintile",
        sort_y=[
            "Lowest",
            "Lower middle",
            "Median",
            "Upper middle",
            "Highest",
        ],
        title=title,
        order="color_Effect_quintile_sort_index:Q",
    )


def stacked_population_density_hbar(df_distribution, title="Chart", columns=CHART_COLS):
    df_temp = df_distribution.reset_index()
    if columns:
        df_temp.columns = columns
    df_temp = pd.melt(
        df_temp,
        id_vars="Effect quintile",
        value_name="Percent",
        var_name="Population density group",
    )
    df_temp["Effect quintile"] = df_temp["Effect quintile"].astype("category")
    return stacked_hbar(
        df_temp,
        x="Percent",
        y="Population density group",
        color="Effect quintile",
        sort_y=[
            "Lowest",
            "Lower middle",
            "Median",
            "Upper middle",
            "Highest",
        ],
        title=title,
        order="color_Effect_quintile_sort_index:Q",
    )

# Copy source files

Copy map source files

In [2]:
# !mkdir ./map_files/
# !cp ../02_allstate/outputs/allstate_auto_clean.geojson ./map_files/
# !cp ../03_auto_club_group/outputs/autoclub_auto_clean.geojson ./map_files
# !cp ../04_liberty_mutual/outputs/libertymutual_auto_gis.geojson ./map_files
# !cp ../05_state_farm/outputs/statefarm_auto_clean_gis.zip ./map_files
# !cp ../06_citizens/outputs/citizens_auto_clean.geojson ./map_files

# Appendix

Retain functions that are no longer in use