In [120]:
import pandas as pd

In [46]:
svArea20142016 = pd.read_csv('service-area-2014-2016.csv', dtype={'County': str})
svArea2017 = pd.read_csv('service-area-2017.csv',  dtype={'County': str})


In [48]:
colsToKeep = [
    'BusinessYear', 
    'StateCode', 
    'ServiceAreaId', 
    'ServiceAreaName', 
    'CoverEntireState', 
    'County', 
    'IssuerId']
def filterSvArea(df):
    df['County'] = df.County.str.replace('\.0', '')
    return df[(df.MarketCoverage == 'Individual') & (df.DentalOnlyPlan == 'No')][colsToKeep]
   

In [49]:
svAreas = pd.concat([filterSvArea(svArea20142016),filterSvArea(svArea2017)])

In [119]:
svAreas[svAreas.BusinessYear == 2017].StateCode.unique()

array(['AL', 'AR', 'GA', 'IL', 'IN', 'KY', 'ME', 'MI', 'MO', 'MT', 'NH',
       'NM', 'NV', 'OK', 'SC', 'TX', 'VA', 'WI', 'DE', 'HI', 'IA', 'KS',
       'NE', 'OH', 'OR', 'PA', 'SD', 'UT', 'WV'], dtype=object)

- filter down to issuers/serviceAreas that only cover entire states

In [51]:
colsToKeep = ['BusinessYear', 'StateCode', 'IssuerId']
entireState = svAreas[svAreas.CoverEntireState == "Yes"]
entireState = entireState.groupby(['BusinessYear','StateCode', 'IssuerId']).count().reset_index()[colsToKeep]
entireState.head()

Unnamed: 0,BusinessYear,StateCode,IssuerId
0,2014,AK,38344
1,2014,AR,75293
2,2014,DE,13537
3,2014,DE,76168
4,2014,DE,81914


Read in the fips codes. To faciliate a join later, we need to duplicate the counties so each county has four records in the data -- one for each year we have health insurance info for.

In [68]:
fipsCodes = pd.read_csv('fips-codes.csv', header=None, dtype=dtypes).rename(columns=colNames)
fipsCodes['County'] = fipsCodes.StateFP.str.cat(fipsCodes.CountyFP)
numCounties = fipsCodes.shape[0]
fipsCodes = pd.concat([fipsCodes, fipsCodes, fipsCodes, fipsCodes])
fipsCodes['BusinessYear'] = 0
fipsCodes = fipsCodes.reset_index()

for i, year in enumerate([2014,2015,2016,2017]):
    begIndex = i * numCounties
    endIndex = begIndex + numCounties
    fipsCodes.loc[begIndex:endIndex, 'BusinessYear'] = year

In [66]:
fipsCodes.head()

Unnamed: 0,index,StateCode,StateFP,CountyFP,CountyName,ClassFP,County,BusinessYear
0,0,AL,1,1,Autauga County,H1,1001,2014
1,1,AL,1,3,Baldwin County,H1,1003,2014
2,2,AL,1,5,Barbour County,H1,1005,2014
3,3,AL,1,7,Bibb County,H1,1007,2014
4,4,AL,1,9,Blount County,H1,1009,2014


Join the counties from fipsCode dataSet with the entireStates. This way, we can associate an issuer who covers an entire state with each of the counties in that state.  

In [73]:
entireStateByCounties = entireState.merge(fipsCodes, on=['BusinessYear', 'StateCode'])

Get servicesAreas/issuers who cover specific counties

In [52]:
colsToKeep = ['BusinessYear', 'StateCode', 'IssuerId', 'County']
countyIssuer = (svAreas[(svAreas.County.notnull())]
               .groupby(['BusinessYear','StateCode', 'County', 'IssuerId'])
               .count()
               .reset_index()[colsToKeep])
countyIssuer.head()

Unnamed: 0,BusinessYear,StateCode,IssuerId,County
0,2014,AR,62141,5005
1,2014,AR,70525,5005
2,2014,AR,62141,5007
3,2014,AR,70525,5007
4,2014,AR,62141,5009


Convert fips codes to five digits to facilitate joining.

In [80]:
def fipsToFive(fips):
    fipsLen = len(fips)
    if fipsLen < 5:
        zerosNeeded = '0' * (5 - fipsLen)
        fips = zerosNeeded + fips
    return fips
countyIssuer['County'] = countyIssuer.County.apply(fipsToFive)

Concatenate the issuers who cover an entire state with those who cover specific counties.

In [89]:
entireAndCounties = pd.concat([countyIssuer, entireStateByCounties[colsToKeep]])
entireAndCounties.head()

Unnamed: 0,BusinessYear,StateCode,IssuerId,County
0,2014,AR,62141,5005
1,2014,AR,70525,5005
2,2014,AR,62141,5007
3,2014,AR,70525,5007
4,2014,AR,62141,5009


Count the distinct number of issuers per county in each year.

In [95]:
issuerCounts = entireAndCounties.groupby(['BusinessYear', 'StateCode', 'County']).agg({'IssuerId': lambda x: x.nunique()}).reset_index()
issuerCounts = issuerCounts.rename(columns={'IssuerId': 'NumIssuers'})

In [105]:
issuerCounts[issuerCounts.BusinessYear == 2014].mean()

BusinessYear    2014.000000
NumIssuers         2.055024
dtype: float64

In [112]:
issuerCounts.to_csv('issuerCounts.csv', index=False)