In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pysal as ps
%matplotlib inline
%config IPCompleter.greedy=True



In [2]:
df = pd.read_csv('fent_county_year.txt', sep = '\t')
fent_all_years = pd.read_csv('all_fentanyl_by_county.txt', sep = '\t')
fent_13_17 = pd.read_csv('all_fent_by_county_13-17.txt', sep = '\t')
national_fent = pd.read_csv('national_fent.txt', sep = '\t')
national_all_opioids = pd.read_csv('grouped_by_year_not_county_all_opioids.txt', sep = '\t')
national_fent_cocaine = pd.read_csv('fent_and_cocaine.txt', sep = '\t')
fent_by_state = pd.read_csv('fent_state.txt', sep = '\t')

In [3]:
df = df.drop('Notes', axis = 'columns')
df = df.dropna()
df = df.drop('Year Code', axis = 'columns')
df.columns = ['county', 'county_code', 'year', 'deaths', 'population', 'crude_rate', 'aa_rate']

fent_all_years = fent_all_years.drop('Notes', axis = 'columns')
fent_all_years = fent_all_years.dropna()
fent_all_years.columns = ['county', 'county_code', 'deaths', 'population', 'crude_rate', 'aa_rate']

fent_13_17 = fent_13_17.drop('Notes', axis = 'columns')
fent_13_17 = fent_13_17.dropna()
fent_13_17.columns = ['county', 'county_code', 'deaths', 'population', 'crude_rate', 'aa_rate']

national_fent = national_fent.drop('Notes', axis = 'columns')
national_fent = national_fent.dropna()
national_fent = national_fent.drop('Year Code', axis = 'columns')
national_fent.columns = ['year', 'deaths', 'population', 'crude_rate', 'aa_rate']

national_all_opioids = national_all_opioids.drop('Notes', axis = 'columns')
national_all_opioids = national_all_opioids.dropna()
national_all_opioids = national_all_opioids.drop('Year Code', axis = 'columns')
national_all_opioids.columns = ['year', 'deaths', 'population', 'crude_rate']

national_fent_cocaine = national_fent_cocaine.drop('Notes', axis = 'columns')
national_fent_cocaine = national_fent_cocaine.dropna()
national_fent_cocaine = national_fent_cocaine.drop('Year Code', axis = 'columns')
national_fent_cocaine.columns = ['year', 'deaths', 'population', 'crude_rate', 'aa_rate']

fent_by_state = fent_by_state.drop(['Notes', 'Year Code'], axis = 'columns')
fent_by_state = fent_by_state.dropna()
fent_by_state.columns = ['state', 'state_code', 'year', 'deaths', 'population', 'crude_rate', 'aa_rate']

In [4]:
# dropping hawaii and alaska because they make the map ugly
df['state'] = df.county.apply(lambda x: x[-2:])
df = df[df['state'] != 'AK']
df = df[df['state'] != 'HI']
fent_13_17['state'] = fent_13_17['county'].apply(lambda x: x[-2:])
fent_13_17 = fent_13_17[fent_13_17['state'] != 'AK']
fent_13_17 = fent_13_17[fent_13_17['state'] != 'HI']
fent_by_state_13on = fent_by_state[fent_by_state.year > 2012]
fent_by_state_13on = fent_by_state_13on[fent_by_state_13on.state != 'Alaska']
fent_by_state_13on = fent_by_state_13on[fent_by_state_13on.state != 'Hawaii']

In [5]:
df = df.drop(df[df['deaths'] == 'Missing'].index)

In [6]:
df = df.drop(df[df.county == 'Bedford city, VA'].index)

In [7]:
# for now put suppressed entries to 0
def convert_suppressed(entry):
    if entry == 'Suppressed':
        return 0
    elif entry == 'Missing':
        return 0
    else:
        return int(entry)
    
df['deaths'] = df['deaths'].apply(convert_suppressed)
fent_all_years['deaths'] = fent_all_years['deaths'].apply(convert_suppressed)
fent_13_17['deaths'] = fent_13_17['deaths'].apply(convert_suppressed)
national_fent['deaths'] = national_fent['deaths'].apply(convert_suppressed)
national_all_opioids['deaths'] = national_all_opioids['deaths'].apply(convert_suppressed)
national_fent_cocaine['deaths'] = national_fent_cocaine['deaths'].apply(convert_suppressed)
fent_by_state['deaths'] = fent_by_state['deaths'].apply(convert_suppressed)

In [60]:
hdf = df[df.year == 2017][['county', 'county_code', 'state']]
hdf['deaths13'] = list(df[df.year == 2013].deaths)
hdf['deaths14'] = list(df[df.year == 2014].deaths)
hdf['deaths15'] = list(df[df.year == 2015].deaths)
hdf['deaths16'] = list(df[df.year == 2016].deaths)
hdf['deaths17'] = list(df[df.year == 2017].deaths)
hdf.reset_index(drop = True, inplace = True)

In [61]:
def clean_df(df):
    df = df.drop(['Notes'], axis = 'columns')
    df = df.dropna()
    df.columns = ['county', 'county_code', 'deaths', 'population', 'crude_rate']
    df.deaths = df.deaths.apply(convert_suppressed)
    return df
df_no_13 = clean_df(pd.read_csv('fent_county_14_17.txt', sep = '\t'))
df_no_14 = clean_df(pd.read_csv('fent_13151617.txt', sep = '\t'))
df_no_15 = clean_df(pd.read_csv('fent_13141617.txt', sep = '\t'))
df_no_16 = clean_df(pd.read_csv('fent_13141517.txt', sep = '\t'))
df_no_17 = clean_df(pd.read_csv('fent_13141516.txt', sep = '\t'))

In [65]:
hdf['total_deaths'] = hdf.merge(fent_13_17[['county', 'deaths']], on = 'county', how = 'left').deaths
hdf['deaths_no_13'] = hdf.merge(df_no_13[['county', 'deaths']], on = 'county', how = 'left').deaths
hdf['deaths_no_14'] = hdf.merge(df_no_14[['county', 'deaths']], on = 'county', how = 'left').deaths
hdf['deaths_no_15'] = hdf.merge(df_no_15[['county', 'deaths']], on = 'county', how = 'left').deaths
hdf['deaths_no_16'] = hdf.merge(df_no_16[['county', 'deaths']], on = 'county', how = 'left').deaths
hdf['deaths_no_17'] = hdf.merge(df_no_17[['county', 'deaths']], on = 'county', how = 'left').deaths

In [68]:
def interpolate_13(row):
    if row.deaths13 > 0:
        return row.deaths13
    elif row.deaths_no_13 > 0:
        return row.total_deaths - row.deaths_no_13
    else:
        return 0

def interpolate_14(row):
    if row.deaths14 > 0:
        return row.deaths14
    elif row.deaths_no_14 > 0:
        return row.total_deaths - row.deaths_no_14
    else:
        return 0
    
def interpolate_15(row):
    if row.deaths15 > 0:
        return row.deaths15
    elif row.deaths_no_15 > 0:
        return row.total_deaths - row.deaths_no_15
    else:
        return 0

def interpolate_16(row):
    if row.deaths16 > 0:
        return row.deaths16
    elif row.deaths_no_16 > 0:
        return row.total_deaths - row.deaths_no_16
    else:
        return 0

def interpolate_17(row):
    if row.deaths17 > 0:
        return row.deaths17
    elif row.deaths_no_17 > 0:
        return row.total_deaths - row.deaths_no_17
    else:
        return 0

hdf['deaths13'] = hdf.apply(interpolate_13, axis = 'columns')
hdf['deaths14'] = hdf.apply(interpolate_14, axis = 'columns')
hdf['deaths15'] = hdf.apply(interpolate_15, axis = 'columns')
hdf['deaths16'] = hdf.apply(interpolate_16, axis = 'columns')
hdf['deaths17'] = hdf.apply(interpolate_17, axis = 'columns')

In [71]:
def interpolate_17_2(row):
    

25900

In [70]:
hdf[(hdf.deaths13 + hdf.deaths14 + hdf.deaths15 + hdf.deaths16 + hdf.deaths17) != hdf.total_deaths]

Unnamed: 0,county,county_code,state,deaths13,deaths14,deaths15,deaths16,deaths17,total_deaths,deaths_no_13,deaths_no_14,deaths_no_15,deaths_no_16,deaths_no_17
4,"Blount County, AL",1009.0,AL,0,2,2,0,0,13,13,11,11,0,0
48,"Mobile County, AL",1097.0,AL,2,1,3,3,0,14,12,13,11,11,0
57,"St. Clair County, AL",1115.0,AL,0,3,1,5,0,15,15,12,14,10,0
62,"Tuscaloosa County, AL",1125.0,AL,1,0,0,1,0,11,10,0,11,10,0
75,"Mohave County, AZ",4015.0,AZ,0,0,1,1,0,11,0,0,10,10,11
97,"Craighead County, AR",5031.0,AR,0,0,0,0,0,10,0,0,0,10,0
99,"Crittenden County, AR",5035.0,AR,0,0,0,0,0,10,10,10,0,0,0
107,"Garland County, AR",5051.0,AR,0,0,0,0,0,10,0,0,0,0,0
168,"Humboldt County, CA",6023.0,CA,1,0,3,2,3,13,12,0,10,11,10
173,"Lake County, CA",6033.0,CA,0,3,2,2,0,13,13,10,11,11,0


In [58]:
check_df

Unnamed: 0,county,county_code,state,deaths13,deaths14,deaths15,deaths16,deaths17,d1617,d151617,d14151617,total_deaths
0,"Autauga County, AL",1001.0,AL,0,0,0,0,0,0,0,0,0
1,"Baldwin County, AL",1003.0,AL,1,5,4,0,0,10,14,19,20
2,"Barbour County, AL",1005.0,AL,0,0,0,0,0,0,0,0,0
3,"Bibb County, AL",1007.0,AL,0,0,0,0,0,0,0,0,0
4,"Blount County, AL",1009.0,AL,0,2,0,0,0,0,11,13,13
5,"Bullock County, AL",1011.0,AL,0,0,0,0,0,0,0,0,0
6,"Butler County, AL",1013.0,AL,0,0,0,0,0,0,0,0,0
7,"Calhoun County, AL",1015.0,AL,0,0,0,0,0,0,0,0,0
8,"Chambers County, AL",1017.0,AL,0,0,0,0,0,0,0,0,0
9,"Cherokee County, AL",1019.0,AL,0,0,0,0,0,0,0,0,0
