In [None]:
import pandas as pd
from sklearn.feature_selection import mutual_info_regression

import censusgeocode as cg

In [None]:
DATA_DIR = '/Users/karenblakemore/opiod-sos/data/'

column_names = ['Census_Tract_Number',        # FIPS Census Tract Number
                'Mortality_Rate',             # Age-adjusted (15 years & older) Mortality Rate per 100,000
                'Frequent_Mental_Distress',   # Predicted prevalence of Frequent Mental Distress in adults
                'Fair_Poor_Health',           # Predicted prevalence of Fair or Poor Health Status among adults
                'Frequent_Physical_Distress', # Predicted prevalence of physical distress in adults
                'Delayed_Medical_Care',       # Predicted prevalence of delayed medical care because of cost in adults
                'Life_Expectancy',            # Estimated life expectancy at birth
                'Takeback_Locations'          # Number of medication take back locations
               ]
pdf = pd.DataFrame()

In [None]:
DATA_SET = 'Drug_Poisoning_or_Overdose_involving_Rx_Opioid_Analgesic_or_Heroin_Mortality_Rate_Census_Tract'
pdf_mortality_rate = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
display(pdf_mortality_rate.head())

pdf['Census_Tract_Number'] = pdf_mortality_rate['TRACT_FIPS']
pdf['Mortality_Rate'] = pdf_mortality_rate['POD_DEATH_ADJRATE']  # age adjusted rate of mortality per 10
pdf = pdf.set_index('Census_Tract_Number')
pdf = pdf.fillna(0)
display(pdf.head(20))

In [None]:
DATA_SET = 'Mental_Health_in_Adults__CDPHE_Community_Level_Estimates_Census_Tracts'
pdf_mental_health = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
pdf_mental_health = pdf_mental_health.rename(columns = {'MNTLD': 'Frequent_Mental_Distress'})
display(pdf_mental_health.head())

pdf = pdf.join(pdf_mental_health.set_index('FIPS')['Frequent_Mental_Distress'])
display(pdf.head())

In [None]:
DATA_SET = 'Health_Status_in_Adults__CDPHE_Community_Level_Estimates_Census_Tracts'
pdf_mental_health = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
pdf_mental_health = pdf_mental_health.rename(columns = {'FPHLTH': 'Fair_Poor_Health'})
display(pdf_mental_health.head())

pdf = pdf.join(pdf_mental_health.set_index('FIPS')['Fair_Poor_Health'])
print(pdf.isna().sum())
display(pdf.head())

In [None]:
DATA_SET = 'Physical_Health_in_Adults__CDPHE_Community_Level_Estimates_Census_Tracts'
pdf_mental_health = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
pdf_mental_health = pdf_mental_health.rename(columns = {'PHYSH': 'Frequent_Physical_Distress'})
display(pdf_mental_health.head())

pdf = pdf.join(pdf_mental_health.set_index('FIPS')['Frequent_Physical_Distress'])
display(pdf.head())

In [None]:
DATA_SET = 'Delayed_Medical_Care_in_Adults___CDPHE_Community_Level_Estimates_Census_Tracts'
pdf_mental_health = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
pdf_mental_health = pdf_mental_health.rename(columns = {'DELMCC': 'Delayed_Medical_Care'})
display(pdf_mental_health.head())

pdf = pdf.join(pdf_mental_health.set_index('FIPS')['Delayed_Medical_Care'])
display(pdf.head())

In [None]:
DATA_SET = 'Colorado_Life_Expectancy_by_Census_Tract_Published_by_NAPHSISUSALEEP_20102015'
pdf_mental_health = pd.read_csv(DATA_DIR + DATA_SET + '.csv')
pdf_mental_health = pdf_mental_health.rename(columns = {'LE_20102015': 'Life_Expectancy'})
display(pdf_mental_health.head())

pdf = pdf.join(pdf_mental_health.set_index('FIPS')['Life_Expectancy'])
pdf['Life_Expectancy'] = pdf['Life_Expectancy'].fillna(pdf['Life_Expectancy'].mean())
display(pdf.head())

In [None]:
DATA_SET = 'take-back-program'
pdf_take_back = pd.read_csv(DATA_DIR + DATA_SET + '.csv')

display(pdf_take_back.head())

pdf_take_back['Takeback_Locations'] = \
    pdf_take_back.apply(lambda row: int(cg.coordinates(x=row['Longitude'], y=row['Latitude'])['Census Tracts'][0]['GEOID']), axis=1)

pdf_take_back_counts = pdf_take_back['Takeback_Locations'].value_counts().to_frame()

display(pdf_take_back_counts.head())

pdf = pdf.join(pdf_take_back_counts['Takeback_Locations'])
pdf['Takeback_Locations'] = pdf['Takeback_Locations'].fillna(0)
display(pdf.head())


In [None]:
print(pdf.isna().sum())

y = pdf['Mortality_Rate']
X = pdf.drop(['Mortality_Rate'], axis=1)

mi = mutual_info_regression(X.values, y)

feature_importance = sorted(zip(list(X), mi), key=lambda x: x[1], reverse=True)
    
import pprint
pprint.pprint(feature_importance)