In [1]:
import pandas as pd
import numpy as np

# Adjust notebook settings to widen the notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [2]:
hpsa = pd.read_csv("./Data/HPSA_Cleaned.csv")
nsduh = pd.read_csv("./Data/nsduh_data_cleaned.csv")
grants = pd.read_csv("./Data/grants_per_county_cbsa.csv")

In [3]:
print('HPSA shape:{}'.format(hpsa.shape))
print('Grants shape:{}'.format(grants.shape))
print('NSDUH shape:{}'.format(nsduh.shape))

HPSA shape:(27829, 52)
Grants shape:(2329, 11)
NSDUH shape:(214505, 98)


In [4]:
grants = grants.rename(columns={'Award Year':'Year', 'Complete County Name':'County Name'})
grants.drop(['centraloutlyingcounty', 'centraloutlyingcounty', 'metropolitanmicropolitanstatis'], axis=1, inplace=True)

In [5]:
# grants = grants.groupby(['Year','PDEN10'])\
#                 .agg({'Total Active Grant Financial Assistance':['median','mean'],
#                      'Mental Health Assistance':['mean']}) # mental health median, q25, q75 are 0

In [6]:
# # flatten hierarchical index 
# grants.columns = [' '.join(col).strip() for col in grants.columns.values]

# grants = grants.reset_index()

### HPSA

In [7]:
# drop unrecognized hpsas
hpsa = hpsa[~hpsa['metropolitanmicropolitanstatis'].isnull()]

In [8]:
hpsa['HPSA Withdrawn Year'] = pd.to_datetime(hpsa['Withdrawn Date']).dt.year

In [9]:
hpsa['HPSA Designation Year'] = pd.to_datetime(hpsa['HPSA Designation Date']).dt.year

In [10]:
# remove repetitive columns & not useful columns for Tableau visualizations
hpsa.drop(['Common State County FIPS Code', 
         'Common State FIPS Code',
         'Common State Name', 
         'County Equivalent Name', 
         'Common State Abbreviation',
         'HPSA Metropolitan Indicator Code', 
         'Primary State FIPS Code', 
         'Primary State Name',
         'State Abbreviation', 
         'State FIPS Code', 
         'State Name', 
         'Common County Name', 
         'Metropolitan Indicator',
         'HPSA Status Code',  
         'Rural Status Code',  
         'HPSA Component State Abbreviation',
         'HPSA Population Type Code', 
         'County Equivalent Name New',
         'centraloutlyingcounty'], axis=1, inplace=True)

## NSDUH

In [11]:
nsduh = nsduh.rename(columns={'Population_Density_2010':'PDEN10'})

## Merge all datasets (NSDUH, HPSA, Grants)

In [12]:
# merge HPSA and Grant by CBSA identifiers
hpsa_grants = hpsa.merge(grants, how='left', left_on=['PDEN10', 'HPSA Designation Year' ], right_on=['PDEN10', 'Year'])

In [14]:
# rename columns names
hpsa_grants.rename({
    'countycountyequivalent': 'County Name', 
    'statename': 'State Name',
    'cbsacode' : 'CBSA Code',
    'metropolitanmicropolitanstatis' : 'Metro or Micro'}, axis=1, inplace=True)

In [15]:
hpsa_grants_15to19 = hpsa_grants.loc[(hpsa_grants['HPSA Designation Year']>=2015) & (hpsa_grants['HPSA Designation Year']<=2019)]

In [24]:
hpsa_grants_15to19[['HPSA Designation Date','Withdrawn Date']] = hpsa_grants_15to19[['HPSA Designation Date','Withdrawn Date']].apply(pd.to_datetime) #if conversion required
hpsa_grants_15to19['Days Before Withdrawn'] = (hpsa_grants_15to19['Withdrawn Date'] - hpsa_grants_15to19['HPSA Designation Date']).dt.days

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [41]:
hpsa_grants_15to19['Year'] = pd.to_numeric(hpsa_grants_15to19['Year'], downcast='integer')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [42]:
hpsa_grants_15to19.to_csv('./Data/hpsa_grant_15to19.csv',index=False)

In [29]:
hpsa_concise = hpsa_grants_15to19.groupby(['PDEN10', 'Year'], as_index=False).agg(
    {'Total Active Grant Financial Assistance':'mean',
     'Mental Health Assistance':'mean',
     'HPSA Designation Population' : 'mean',
     'Days Before Withdrawn':'mean'})

In [30]:
hpsa_concise['Year'] = pd.to_numeric(hpsa_concise['Year'], downcast='integer')

In [31]:
hpsa_concise.head(2)

Unnamed: 0,PDEN10,Year,Total Active Grant Financial Assistance,Mental Health Assistance,HPSA Designation Population,Days Before Withdrawn
0,1,2015,1427335.0,0.0,104441.792208,968.099237
1,1,2016,1160565.0,2064.130137,225875.290152,1041.714455


In [34]:
# merge with nsduh dataset 
final_df = nsduh.merge(hpsa_concise, how='left', left_on=['PDEN10', 'Year' ], right_on=['PDEN10', 'Year'])

In [38]:
final_df.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'Identifier', 'Year',
       'Inpatient_Past_Year', 'Outpatient_Past_Year',
       'Prescription_Treatment_Past_Year', 'Any_Treatment_Past_Year',
       'Treatment_Type_Past_Year', 'Perceived_Unmet_Need',
       ...
       'Total_Income_Family',
       'Participated_In_One_Or_More_Government_Assistance_Programs',
       'Total_Income_Family_Recode', 'Poverty_Level', 'PDEN10',
       'County_Metro_NonMetro_Status',
       'Total Active Grant Financial Assistance', 'Mental Health Assistance',
       'HPSA Designation Population', 'Days Before Withdrawn'],
      dtype='object', length=102)

In [39]:
final_df.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1, inplace=True)

In [40]:
## save merged HPSA, NSDUH, Grant by CBSA columns dataset in ./Data folder 
final_df.to_csv('./Data/final_df_tableau.csv',index=False)