In [10]:
# Dependencies and Setup
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import scipy.stats as sts
import seaborn
import pingouin as pg
from pingouin import pairwise_ttests

### Import opioid mortality rate dataset

In [11]:
health04_S_file_path = "../Resources/Health04/Health04_S.csv"
mortality_data = pd.read_csv(health04_S_file_path)
mortality_df = pd.DataFrame(mortality_data)
mortality_df.head(3)

Unnamed: 0,G_STATEFP,STATEFP,st_abb,st_name,opPrscRt19,prMisuse19,odMortRt14,odMortRt15,odMortRt16,odMortRt17,odMortRt18,odMortRt19,odMortRtAv
0,G01,1,AL,Alabama,39.1,4.55,15.2,15.7,16.2,18.0,16.6,16.3,16.33
1,G02,2,AK,Alaska,85.8,3.99,16.8,16.0,16.8,20.2,14.6,17.8,17.03
2,G04,4,AZ,Arizona,80.9,3.8,18.2,19.0,20.3,22.2,23.8,26.8,21.72


### Import dataset with prescription rate

In [12]:
health04_C_file_path = "../Resources/Health04/Health04_C.csv"
script_data = pd.read_csv(health04_C_file_path)
script_data.head(3)

Unnamed: 0,G_COUNTYFP,COUNTYFP,st_abb,cnty_name,opPrscRt,odMortRt14,odMortRt15,odMortRt16,odMortRt17,odMortRt18,odMortRtAv
0,G01001,1001,AL,Autauga County,101.3,10.0,10.8,12.5,12.2,13.2,11.74
1,G01003,1003,AL,Baldwin County,67.6,17.7,19.6,17.5,16.6,16.6,17.6
2,G01005,1005,AL,Barbour County,27.2,9.9,12.2,12.4,12.8,14.6,12.38


In [3]:
### Create a dataframe with state abbreviation and opioid mortality rate from mortality_data

mort_df = mortality_df.loc[:,['st_abb', 'odMortRtAv']]
mort_df = mort_df.rename(columns={'st_abb' : 'STATE', 'odMortRtAv' : 'odMortRtAv'})
mort_df.head(5)

### Import region dataset

region_file_path = "../Resources/Geographic/state_region.csv"
region_data = pd.read_csv(region_file_path)
region_df = pd.DataFrame(region_data).loc[:,['State Code', 'Region', 'Division']].rename(columns={'State Code' : 'STATE', 'Region' : 'REGION', 'Division' : 'DIVISION'})
region_df.head(5)

### Merge mort and region dfs

mort_region_df = pd.merge(region_df, mort_df, on="STATE", how="inner")
mort_region_df.head(3)

### Import treatment facility dataset

nssats_file_path = "../Resources/NSSATS/NSSATS_PUF_2020_CSV.csv"

facility_data = pd.read_csv(nssats_file_path, low_memory=False)
facility_data.head(3)

facility_df = pd.DataFrame(facility_data)
facility_df.shape

### Remove Jurisdictions not included in Health04 dataset

rejected_state_codes = ['PR', 'ZZ']
facility_rejected_state_s = facility_df.STATE.isin(rejected_state_codes)
facility_rejected_state_s

facility_df[~facility_rejected_state_s] #with or without loc same output

fac_df = facility_df.loc[~facility_df.STATE.isin(rejected_state_codes)]
fac_df

fac_df = facility_df.loc[facility_df.STATE != 'PR'].loc[facility_df.STATE != 'ZZ']
fac_df.STATE.nunique()

### Merge mortality and facility dfs

df = pd.merge(mort_region_df, fac_df, on = "STATE", how = "inner") 
print(df.shape)
df.head(3)