# Opioids Project

Ra'Kira Nelson and Alexa Fahrer

In [1]:
import pandas as pd

pd.set_option("mode.copy_on_write", True)

## Prescriptions

In [2]:
prescriptions_raw = pd.read_parquet("ids590_opioids_by_drug_county_year.parquet")

In [3]:
prescriptions = prescriptions_raw.copy()
prescriptions

Unnamed: 0,buyer_state,buyer_county,year,drug_name,mme_conversion_factor,calc_base_wt_in_gm
0,AK,ANCHORAGE,2006,BUPRENORPHINE,30.0,125.154336
1,AK,ANCHORAGE,2006,BUPRENORPHINE,75.0,0.216367
2,AK,ANCHORAGE,2006,CODEINE,0.15,13362.99003
3,AK,ANCHORAGE,2006,DIHYDROCODEINE,0.25,4.27264
4,AK,ANCHORAGE,2006,FENTANYL,100.0,476.051283
...,...,...,...,...,...,...
563626,WY,WESTON,2019,METHADONE,4.0,25.4904
563627,WY,WESTON,2019,MORPHINE,1.0,332.57952
563628,WY,WESTON,2019,OXYCODONE,1.5,621.053064
563629,WY,WESTON,2019,OXYMORPHONE,3.0,9.63468


In [4]:
prescriptions["mme_conversion_factor"] = (
    prescriptions["mme_conversion_factor"].to_numpy().astype("float64")
)
prescriptions["calc_base_wt_in_gm"] = (
    prescriptions["calc_base_wt_in_gm"].to_numpy().astype("float64")
)
prescriptions["buyer_county"] = prescriptions["buyer_county"].str.upper().str.strip()
prescriptions["buyer_state"] = prescriptions["buyer_state"].str.upper().str.strip()

prescriptions = prescriptions[
    ~prescriptions["buyer_state"].isin(["PR", "VI", "GU", "MP", "AS", "PW"])
]

In [5]:
prescriptions

Unnamed: 0,buyer_state,buyer_county,year,drug_name,mme_conversion_factor,calc_base_wt_in_gm
0,AK,ANCHORAGE,2006,BUPRENORPHINE,30.00,125.154336
1,AK,ANCHORAGE,2006,BUPRENORPHINE,75.00,0.216367
2,AK,ANCHORAGE,2006,CODEINE,0.15,13362.990030
3,AK,ANCHORAGE,2006,DIHYDROCODEINE,0.25,4.272640
4,AK,ANCHORAGE,2006,FENTANYL,100.00,476.051283
...,...,...,...,...,...,...
563626,WY,WESTON,2019,METHADONE,4.00,25.490400
563627,WY,WESTON,2019,MORPHINE,1.00,332.579520
563628,WY,WESTON,2019,OXYCODONE,1.50,621.053064
563629,WY,WESTON,2019,OXYMORPHONE,3.00,9.634680


## Deaths

In [6]:
deaths_dfs = {}
for year in range(2003, 2016):
    key = f"deaths_{year}"
    url = (
        "https://media.githubusercontent.com/media/nickeubank/ids540_opioid_data/"
        f"refs/heads/main/vitalstatistics/Underlying%20Cause%20of%20Death%2C%20{year}.txt"
    )

    df = pd.read_csv(url, sep="\t", skipfooter=15, engine="python")
    df = df.drop(columns=["Notes"])
    deaths_dfs[key] = df

deaths = pd.concat(
    [deaths_dfs[f"deaths_{year}"].assign(year=year) for year in range(2006, 2016)],
    ignore_index=True,
)

In [7]:
deaths["Year"] = pd.to_numeric(deaths["Year"], errors="coerce").astype("Int64")
# deaths[["County Name", "State"]] = deaths["County"].str.split(",", n=1, expand=True)
# deaths["County Name"] = deaths["County Name"].str.strip()
# deaths["State"] = deaths["State"].str.strip()
# deaths["County Name"] = (
#    deaths["County Name"].str.replace(r"\bCounty\b", "", regex=True).str.strip()
# )
deaths = deaths[
    deaths["Drug/Alcohol Induced Cause"]
    == "Drug poisonings (overdose) Unintentional (X40-X44)"
]
deaths["fips"] = deaths["County Code"].astype(str).str.zfill(5)
deaths = deaths.drop(columns=["Year Code", "year", "County Code"])

In [8]:
deaths

Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,fips
1,"Baldwin County, AL",2006,Drug poisonings (overdose) Unintentional (X40-...,D1,11.0,01003
12,"Chilton County, AL",2006,Drug poisonings (overdose) Unintentional (X40-...,D1,13.0,01021
39,"Jefferson County, AL",2006,Drug poisonings (overdose) Unintentional (X40-...,D1,55.0,01073
55,"Mobile County, AL",2006,Drug poisonings (overdose) Unintentional (X40-...,D1,23.0,01097
60,"Montgomery County, AL",2006,Drug poisonings (overdose) Unintentional (X40-...,D1,12.0,01101
...,...,...,...,...,...,...
44778,"Waukesha County, WI",2015,Drug poisonings (overdose) Unintentional (X40-...,D1,34,55133
44784,"Winnebago County, WI",2015,Drug poisonings (overdose) Unintentional (X40-...,D1,22,55139
44794,"Fremont County, WY",2015,Drug poisonings (overdose) Unintentional (X40-...,D1,10,56013
44800,"Laramie County, WY",2015,Drug poisonings (overdose) Unintentional (X40-...,D1,13,56021


## FIPS

In [9]:
fips = pd.read_excel("US_FIPS_Codes.xls", skiprows=1)

In [10]:
fips["fips"] = fips["FIPS State"].astype(str).str.zfill(2) + fips["FIPS County"].astype(
    str
).str.zfill(3)

us_state_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "District of Columbia": "DC",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
}

fips["state_abbrev"] = fips["State"].map(us_state_abbrev)
fips["County Name"] = fips["County Name"].str.upper().str.strip()
fips["state_abbrev"] = fips["state_abbrev"].str.upper().str.strip()

fips["County Name"] = (
    fips["County Name"]
    .str.upper()
    .str.strip()
    .str.replace(r"^ST[.\s]+", "SAINT ", regex=True)
)

fips

Unnamed: 0,State,County Name,FIPS State,FIPS County,fips,state_abbrev
0,Alabama,AUTAUGA,1,1,01001,AL
1,Alabama,BALDWIN,1,3,01003,AL
2,Alabama,BARBOUR,1,5,01005,AL
3,Alabama,BIBB,1,7,01007,AL
4,Alabama,BLOUNT,1,9,01009,AL
...,...,...,...,...,...,...
3137,Wyoming,SWEETWATER,56,37,56037,WY
3138,Wyoming,TETON,56,39,56039,WY
3139,Wyoming,UINTA,56,41,56041,WY
3140,Wyoming,WASHAKIE,56,43,56043,WY


In [11]:
prescriptions["buyer_county"] = prescriptions["buyer_county"].replace(
    {
        # Alaska
        "PETERSBURG": "WRANGELL PETERSBURG",
        "PRINCE OF WALES HYDER": "PRINCE WALES KETCHIKAN",
        "SKAGWAY": "SKAGWAY HOONAH ANGOON",
        "WRANGELL": "WRANGELL PETERSBURG",
        # Georgia / Illinois
        "DEKALB": "DE KALB",
        # Illinois
        "DUPAGE": "DU PAGE",
        # Indiana
        "ST JOSEPH": "SAINT JOSEPH",
        # Louisiana
        "ST JOHN THE BAPTIST": "SAINT JOHN THE BAPTIST",
        # Missouri
        "SAINTE GENEVIEVE": "STE GENEVIEVE",
        # Mississippi
        "DESOTO": "DE SOTO",
        # Virginia
        "SALEM": "SALEM CITY",
    }
)

In [12]:
prescriptions_fips_merged = pd.merge(
    prescriptions,
    fips[["state_abbrev", "County Name", "fips"]],
    left_on=["buyer_state", "buyer_county"],
    right_on=["state_abbrev", "County Name"],
    how="left",
    indicator=True,
    validate="m:1",
)
prescriptions_fips_merged

prescriptions_merged = prescriptions_fips_merged.drop(
    columns=["state_abbrev", "County Name", "_merge"]
).copy()

In [13]:
print(prescriptions_fips_merged["_merge"].value_counts())

_merge
both          553223
left_only        674
right_only         0
Name: count, dtype: int64


In [14]:
unmatched = prescriptions_fips_merged[
    prescriptions_fips_merged["_merge"] == "left_only"
]
unmatched[["buyer_state", "buyer_county"]].drop_duplicates().sort_values(
    ["buyer_state", "buyer_county"]
)

Unnamed: 0,buyer_state,buyer_county
24515,AR,MONTGOMERY
256805,MO,DE KALB
337393,NJ,SALEM CITY
433595,TN,DE KALB


## Population

In [15]:
pop_1 = pd.read_csv("co-est00int-tot.csv", encoding="latin1")
pop_2 = pd.read_csv("co-est2020.csv", encoding="latin1")

In [16]:
p1_sub = pop_1[
    [
        "STATE",
        "COUNTY",
        "STNAME",
        "CTYNAME",
        "POPESTIMATE2006",
        "POPESTIMATE2007",
        "POPESTIMATE2008",
        "POPESTIMATE2009",
    ]
].copy()
p2_sub = pop_2[
    [
        "STATE",
        "COUNTY",
        "STNAME",
        "CTYNAME",
        "POPESTIMATE2010",
        "POPESTIMATE2011",
        "POPESTIMATE2012",
        "POPESTIMATE2013",
        "POPESTIMATE2014",
        "POPESTIMATE2015",
    ]
].copy()
pop_merged = p1_sub.merge(
    p2_sub, on=["STATE", "COUNTY", "STNAME", "CTYNAME"], how="inner"
)
pop_merged["fips"] = pop_merged["STATE"].astype(str).str.zfill(2) + pop_merged[
    "COUNTY"
].astype(str).str.zfill(3)

In [17]:
pop_merged

Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,POPESTIMATE2006,POPESTIMATE2007,POPESTIMATE2008,POPESTIMATE2009,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,fips
0,1,0,Alabama,Alabama,4628981,4672840,4718206,4757938,4785514,4799642,4816632,4831586,4843737,4854803,01000
1,1,1,Alabama,Autauga County,51328,52405,53277,54135,54761,55229,54970,54747,54922,54903,01001
2,1,3,Alabama,Baldwin County,168121,172404,175827,179406,183121,186579,190203,194978,199306,203101,01003
3,1,5,Alabama,Barbour County,27861,27757,27808,27657,27325,27344,27172,26946,26768,26300,01005
4,1,7,Alabama,Bibb County,22099,22438,22705,22941,22858,22736,22657,22510,22541,22553,01007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3183,56,37,Wyoming,Sweetwater County,39749,41470,42358,44133,43580,44000,45032,45189,44996,44780,56037
3184,56,39,Wyoming,Teton County,20014,20472,20988,21232,21298,21422,21643,22335,22801,23083,56039
3185,56,41,Wyoming,Uinta County,19709,20171,20613,21054,21090,20901,21008,20969,20835,20777,56041
3186,56,43,Wyoming,Washakie County,7979,8169,8229,8423,8531,8451,8410,8417,8277,8282,56043


In [18]:
year_cols = [c for c in pop_merged.columns if c.startswith("POPESTIMATE")]
pop_long = pop_merged.melt(
    id_vars=["fips", "STNAME", "CTYNAME"],
    value_vars=year_cols,
    var_name="pop_var",
    value_name="population",
)
pop_long["Year"] = pop_long["pop_var"].str.extract(r"(\d{4})").astype(int)
pop_long = pop_long.drop(columns=["pop_var"])
pop_long

Unnamed: 0,fips,STNAME,CTYNAME,population,Year
0,01000,Alabama,Alabama,4628981,2006
1,01001,Alabama,Autauga County,51328,2006
2,01003,Alabama,Baldwin County,168121,2006
3,01005,Alabama,Barbour County,27861,2006
4,01007,Alabama,Bibb County,22099,2006
...,...,...,...,...,...
31875,56037,Wyoming,Sweetwater County,44780,2015
31876,56039,Wyoming,Teton County,23083,2015
31877,56041,Wyoming,Uinta County,20777,2015
31878,56043,Wyoming,Washakie County,8282,2015


## Merging

In [19]:
deaths_with_pop = pd.merge(
    deaths,
    pop_long[["fips", "Year", "population"]],
    on=["fips", "Year"],
    how="left",
    validate="m:1",
    indicator=True,
)
print(deaths_with_pop["_merge"].value_counts())

_merge
both          6324
left_only        5
right_only       0
Name: count, dtype: int64


In [None]:
prescriptions_with_pop = pd.merge(
    prescriptions_merged,
    pop_long[["fips", "Year", "population"]].rename(columns={"Year": "year"}),
    on=["fips", "year"],
    how="left",
    validate="m:1",
    indicator=True,
)
print(prescriptions_with_pop["_merge"].value_counts())

_merge
both          396751
left_only     157146
right_only         0
Name: count, dtype: int64
