In [19]:
import pandas as pd
from pathlib import Path

In [20]:
DATA_PATH = Path("data_out/ma2018_plan_county_year_inner.csv")
if not DATA_PATH.exists():
    raise FileNotFoundError("Run hwk1_code.ipynb first to create data_out/ma2018_plan_county_year_inner.csv")

ma2018 = pd.read_csv(DATA_PATH, low_memory=False)

# 2) Basic cleaning / types
ma2018 = ma2018.dropna(subset=["contractid", "planid", "plan_type"]).copy()
ma2018["planid_num"] = pd.to_numeric(ma2018["planid"], errors="coerce")
ma2018["avg_enrollment"] = pd.to_numeric(ma2018["avg_enrollment"], errors="coerce")

# define a "plan" as contractid + planid (same plan can appear in many counties)
ma2018["plan_key"] = ma2018["contractid"].astype(str) + "-" + ma2018["planid_num"].astype("Int64").astype(str)

In [21]:
# TABLE 1: Plan count by plan type (2018)

table1 = (
    ma2018[["plan_type", "plan_key"]]
    .drop_duplicates()
    .groupby("plan_type", as_index=False)
    .agg(**{"2018": ("plan_key", "nunique")})
    .rename(columns={"plan_type": "Type"})
    .sort_values("2018", ascending=False)
)

table1

Unnamed: 0,Type,2018
1,HMO/HMOPOS,2678
2,Local PPO,966
5,National PACE,254
7,Regional PPO,109
0,1876 Cost,101
4,Medicare-Medicaid Plan HMO/HMOPOS,54
6,PFFS,50
3,MSA,5


In [22]:
# TABLE 2: Plan count by plan type (2018) excluding SNP, EGHP, and 800-series plans

df2 = ma2018[
    (ma2018["snp"].astype(str).str.upper() != "YES") &
    (ma2018["eghp"].astype(str).str.upper() != "YES") &
    (~ma2018["planid_num"].between(800, 899, inclusive="both"))
].copy()

table2 = (
    df2[["plan_type", "plan_key"]]
    .drop_duplicates()
    .groupby("plan_type", as_index=False)
    .agg(**{"2018": ("plan_key", "nunique")})
    .rename(columns={"plan_type": "Type"})
    .sort_values("2018", ascending=False)
)

table2

Unnamed: 0,Type,2018
1,HMO/HMOPOS,1569
2,Local PPO,569
5,National PACE,254
0,1876 Cost,93
4,Medicare-Medicaid Plan HMO/HMOPOS,54
7,Regional PPO,49
6,PFFS,46
3,MSA,3


In [24]:
# TABLE 3

table3 = (
    df2.groupby("plan_type", as_index=False)
       .agg(**{"2018": ("avg_enrollment", "mean")})
       .rename(columns={"plan_type": "Type"})
       .sort_values("2018", ascending=False)
)

table3["2018"] = table3["2018"].round(2)
table3

Unnamed: 0,Type,2018
4,Medicare-Medicaid Plan HMO/HMOPOS,892.21
1,HMO/HMOPOS,285.25
7,Regional PPO,146.48
0,1876 Cost,116.17
2,Local PPO,68.15
6,PFFS,53.33
5,National PACE,45.04
3,MSA,33.8
