In [16]:
import pandas as pd
import numpy as np

# Adjust notebook settings to widen the notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:95% !important;}</style>"))
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [17]:
hpsa = pd.read_csv("./Data/HPSA_Cleaned.csv")
nsduh = pd.read_csv("./Data/nsduh_data_cleaned.csv")
grants = pd.read_csv("./Data/grants_per_county_cbsa.csv")

In [18]:
print('HPSA shape:{}'.format(hpsa.shape))
print('Grants shape:{}'.format(grants.shape))
print('NSDUH shape:{}'.format(nsduh.shape))

HPSA shape:(27829, 52)
Grants shape:(2329, 11)
NSDUH shape:(214505, 98)


In [19]:
grants.head(2)

Unnamed: 0,Complete County Name,State Name,Award Year,Total Active Grant Financial Assistance,Mental Health Assistance,countycountyequivalent,centraloutlyingcounty,statename,cbsacode,metropolitanmicropolitanstatis,PDEN10
0,Accomack County,Virginia,2017,52817.0,0.0,,,,,,3
1,Accomack County,Virginia,2018,4973949.0,0.0,,,,,,3


In [20]:
grants = grants.rename(columns={'Award Year':'Year'})

### HPSA

In [21]:
# drop unrecognized hpsas
hpsa = hpsa[~hpsa['metropolitanmicropolitanstatis'].isnull()]

In [22]:
hpsa['HPSA Withdrawn Year'] = pd.to_datetime(hpsa['Withdrawn Date']).dt.year

In [23]:
hpsa['HPSA Designation Year'] = pd.to_datetime(hpsa['HPSA Designation Date']).dt.year

In [26]:
# remove repetitive columns & not useful columns for Tableau visualizations
hpsa.drop(['Common State County FIPS Code', 
         'Common State FIPS Code',
         'Common State Name', 
         'County Equivalent Name', 
         'Common State Abbreviation',
         'HPSA Metropolitan Indicator Code', 
         'Primary State FIPS Code', 
         'Primary State Name',
         'State Abbreviation', 
         'State FIPS Code', 
         'State Name', 
         'Common County Name', 
         'Metropolitan Indicator',
         'HPSA Status Code',  
         'Rural Status Code',  
         'HPSA Component State Abbreviation',
         'HPSA Population Type Code', 
         'County Equivalent Name New',
         'centraloutlyingcounty'], axis=1, inplace=True)

## NSDUH

In [28]:
nsduh = nsduh.rename(columns={'Population_Density_2010':'PDEN10'})

## Merge all datasets (NSDUH, HPSA, Grants)

In [29]:
hpsa.head(2)

Unnamed: 0,Withdrawn Date,HPSA Name,HPSA Component Name,HPSA Component Type Code,HPSA Component Type Description,HPSA Designation Population Type Description,HPSA Type Code,State and County Federal Information Processing Standard Code,U.S. - Mexico Border 100 Kilometer Indicator,U.S. - Mexico Border County Indicator,Common Region Name,County or County Equivalent Federal Information Processing Standard Code,HPSA Designation Last Update Date,HPSA Designation Date,HPSA Status,HPSA Geography Identification Number,HPSA Score,Primary State Abbreviation,Primary HHS Region Name,Designation Type,HPSA ID,HPSA Designation Population,Rural Status,HPSA Degree of Shortage,HPSA FTE,HPSA Population Type,HPSA Shortage,DaysBeforeWithdrawn,countycountyequivalent,statename,cbsacode,metropolitanmicropolitanstatis,PDEN10,HPSA Withdrawn Year,HPSA Designation Year
0,,Stanley Correctional Institution,Stanley Correctional Institution,UNK,Unknown,Correctional Facility,PRSN,55017,N,N,Region 5,17,08/02/2018,07/21/2003,Designated,POINT,15,WI,Region 5,Correctional Facility,7551065910,2885.0,Non-Rural,6,0.6,,0.84,,Chippewa County,Wisconsin,20740.0,Metropolitan Statistical Area,1,,2003
1,07/02/2018,Rock County,Rock,SCTY,Single County,Geographic Population,Hpsa Geo HN,55105,N,N,Region 5,105,07/02/2018,04/09/2014,Withdrawn,55105,13,WI,Region 5,High Needs Geographic HPSA,755105,156639.0,Partially Rural,Not applicable,8.5,Geographic Population,0.74,1545 days,Rock County,Wisconsin,27500.0,Metropolitan Statistical Area,1,2018.0,2014


In [30]:
grants.head(2)

Unnamed: 0,Complete County Name,State Name,Year,Total Active Grant Financial Assistance,Mental Health Assistance,countycountyequivalent,centraloutlyingcounty,statename,cbsacode,metropolitanmicropolitanstatis,PDEN10
0,Accomack County,Virginia,2017,52817.0,0.0,,,,,,3
1,Accomack County,Virginia,2018,4973949.0,0.0,,,,,,3


In [11]:
# merge HPSA and Grant by CBSA identifiers
hpsa_grants = hpsa.merge(grants, how='left', left_on=['countycountyequivalent', 'statename', 'cbsacode', 'statename', 'PDEN10' ], right_on=['countycountyequivalent', 'statename', 'cbsacode', 'statename', 'PDEN10'])

In [12]:
hpsa_grants.columns

Index(['Withdrawn Date', 'HPSA Name', 'Common State County FIPS Code',
       'Common State FIPS Code', 'Common State Name', 'County Equivalent Name',
       'HPSA Component Name', 'HPSA Component Type Code',
       'HPSA Component Type Description',
       'HPSA Designation Population Type Description',
       'Common State Abbreviation', 'HPSA Metropolitan Indicator Code',
       'HPSA Type Code', 'Primary State FIPS Code', 'Primary State Name',
       'State Abbreviation',
       'State and County Federal Information Processing Standard Code',
       'State FIPS Code', 'State Name',
       'U.S. - Mexico Border 100 Kilometer Indicator',
       'U.S. - Mexico Border County Indicator', 'HPSA Status Code',
       'Common Region Name',
       'County or County Equivalent Federal Information Processing Standard Code',
       'Common County Name', 'Metropolitan Indicator',
       'HPSA Designation Last Update Date', 'HPSA Designation Date',
       'HPSA Status', 'HPSA Geography Identifica

In [15]:
# remove repetitive columns & not useful columns for Tableau visualizations
hpsa_grants.drop(['Common State County FIPS Code', 
                 'Common State FIPS Code',
                 'Common State Name', 
                 'County Equivalent Name', 
                 'Common State Abbreviation',
                 'HPSA Metropolitan Indicator Code', 
                 'Primary State FIPS Code', 
                 'Primary State Name',
                 'State Abbreviation', 
                 'State FIPS Code', 
                 'State Name', 
                 'Common County Name', 
                 'Metropolitan Indicator',
                 'HPSA Status Code',  
                 'Rural Status Code',  
                 'HPSA Component State Abbreviation',
                 'HPSA Population Type Code', 
                 'County Equivalent Name New',
                 'Complete County Name',
                 'centraloutlyingcounty'], axis=1, inplace=True)
 
# rename columns names
hpsa_grants.rename({
    'countycountyequivalent': 'County Name', 
    'statename': 'State Name',
    'cbsacode' : 'CBSA Code',
    'metropolitanmicropolitanstatis' : 'Metro or Micro'}, axis=1, inplace=True)

KeyError: "['Common State County FIPS Code' 'Common State FIPS Code'\n 'Common State Name' 'County Equivalent Name' 'Common State Abbreviation'\n 'HPSA Metropolitan Indicator Code' 'Primary State FIPS Code'\n 'Primary State Name' 'State Abbreviation' 'State FIPS Code' 'State Name'\n 'Common County Name' 'Metropolitan Indicator' 'HPSA Status Code'\n 'Rural Status Code' 'HPSA Component State Abbreviation'\n 'HPSA Population Type Code' 'County Equivalent Name New'\n 'Complete County Name' 'centraloutlyingcounty'] not found in axis"

In [None]:
HPSA_Grant.columns

In [None]:
## save merged HPSA and Grant by CBSA columns dataset in ./Data folder 
HPSA_Grant.to_csv('./Data/hpsa_grant_cbsa.csv',index=False)

In [None]:
df = HPSA_Grant[[
    'PDEN10', 'Metro or Micro', 'County Name', 'State Name', 'Rural Status',
    'HPSA Name', 'HPSA Component Name',
    'HPSA Component Type Code', 'HPSA Component Type Description',
    'HPSA Designation Population Type Description', 'HPSA Type Code',
    'State and County Federal Information Processing Standard Code',
    'U.S. - Mexico Border 100 Kilometer Indicator',
    'U.S. - Mexico Border County Indicator', 'Common Region Name',
    'County or County Equivalent Federal Information Processing Standard Code',
    'HPSA Status', 'HPSA Geography Identification Number', 'HPSA Score',
    'Primary State Abbreviation', 'Primary HHS Region Name',
    'Designation Type', 'HPSA ID', 'HPSA Designation Population',
    'HPSA Degree of Shortage', 'HPSA FTE', 'HPSA Population Type',
    'HPSA Shortage', 'HPSA Designation Last Update Date', 'HPSA Designation Date',
    'Withdrawn Date', 'DaysBeforeWithdrawn',
    'CBSA Code', 'Total Active Grant Financial Assistance', 'Mental Health Assistance']]

In [None]:
df.head(3)

In [None]:
# df.groupby(['PDEN10'], as_index=False).agg(
#     {'County Name':'count',
#      'Total Active Grant Financial Assistance':'mean',
#     'State Name': 'count',
#     'Rural Status' : 'count'}
# )

In [None]:
df_final[df_final['Total Active Grant Financial Assistance q25'].isnull()][['Year','PDEN10']]

In [None]:
# save merged df
df_final.to_csv("./Data/NSDUH_HPSA_GRANT_finaldata.csv",index=False)