In [1]:
import pandas as pd

In [2]:
#Land area data from: https://www.census.gov/library/publications/2011/compendia/usa-counties-2011.html#LND
df = pd.read_csv('county_land_area_2010.csv',dtype = {'FIPS': str})
df

Unnamed: 0,Areaname,FIPS,land_area_sqm
0,UNITED STATES,00000,3531905.43
1,ALABAMA,01000,50645.33
2,"Autauga, AL",01001,594.44
3,"Baldwin, AL",01003,1589.78
4,"Barbour, AL",01005,884.88
...,...,...,...
3193,"Sweetwater, WY",56037,10426.65
3194,"Teton, WY",56039,3995.38
3195,"Uinta, WY",56041,2081.26
3196,"Washakie, WY",56043,2238.55


In [3]:
#Rename columns
df = df.rename(columns={'Areaname':'state','land_area_sqm':'land_area_sqmi'})

In [4]:
#Df for state areas
df_states = df.loc[(df.FIPS.str.endswith('000'))]
df_states

Unnamed: 0,state,FIPS,land_area_sqmi
0,UNITED STATES,0,3531905.43
1,ALABAMA,1000,50645.33
69,ALASKA,2000,570640.95
99,ARIZONA,4000,113594.08
115,ARKANSAS,5000,52035.48
191,CALIFORNIA,6000,155779.22
250,COLORADO,8000,103641.89
315,CONNECTICUT,9000,4842.36
324,DELAWARE,10000,1948.54
328,DISTRICT OF COLUMBIA,11000,61.05


In [5]:
df = df.rename(columns={'state':'county_state'})

In [6]:
#Df for county areas
df = df.loc[~(df.FIPS.str.endswith('000'))]
df

Unnamed: 0,county_state,FIPS,land_area_sqmi
2,"Autauga, AL",01001,594.44
3,"Baldwin, AL",01003,1589.78
4,"Barbour, AL",01005,884.88
5,"Bibb, AL",01007,622.58
6,"Blount, AL",01009,644.78
...,...,...,...
3193,"Sweetwater, WY",56037,10426.65
3194,"Teton, WY",56039,3995.38
3195,"Uinta, WY",56041,2081.26
3196,"Washakie, WY",56043,2238.55


In [8]:
df.describe()

Unnamed: 0,land_area_sqmi
count,3146.0
mean,1122.665928
std,3609.86202
min,0.0
25%,430.61
50%,615.38
75%,923.7725
max,145504.79


In [10]:
#Dictionary for updating FIPS and county names so they are recognized by Tableau.
#See here for details: https://github.com/spearitual/Updated-FIPS-codes-for-Tableau 
FIPS_swapper = {'51540': '51003',
 '51580': '51005',
 '51820': '51015',
 '51680': '51031',
 '51640': '51035',
 '51730': '51053',
 '51610': '51059',
 '51840': '51069',
 '51595': '51081',
 '51690': '51089',
 '51830': '51095',
 '15005': '15009',
 '51750': '51121',
 '51590': '51143',
 '51670': '51149',
 '51685': '51153',
 '51775': '51161',
 '51678': '51163',
 '51660': '51165',
 '51620': '51175',
 '51630': '51177',
 '51520': '51191',
 '51720': '51195',
 '51735': '51199',
 '51790': '51015',
 '51570': '51053',
 '51600': '51059',
 '51683': '51153',
 '51530': '51163',
 '09013': '09170',
 '09001': '09190',
 '02010': '02016',
 '02201': '02198',
 '02231': '02105',
 '02232': '02105',
 '02280': '02195',
 '02901': '02016',
 '02903': '02105',
 '02904': '02185',
 '02905': '02050',
 '02907': '02164',
 '02908': '02066',
 '02910': '02100',
 '02912': '02122',
 '02916': '02290',
 '02919': '02130',
 '02920': '02198',
 '02921': '02122',
 '02922': '02220',
 '02923': '02282',
 '02924': '02240',
 '02925': '02290',
 '02926': '02063',
 '02928': '02275',
 '02929': '02290',
 '15901': '15009',
 '51901': '51003',
 '51903': '51005',
 '51907': '51015',
 '51911': '51031',
 '51913': '51035',
 '51918': '51053',
 '51919': '51059',
 '51921': '51069',
 '51923': '51081',
 '51929': '51089',
 '51931': '51095',
 '51933': '51121',
 '51939': '51143',
 '51941': '51149',
 '51942': '51153',
 '51944': '51161',
 '51945': '51163',
 '51947': '51165',
 '51949': '51175',
 '51951': '51177',
 '51953': '51191',
 '51955': '51195',
 '51958': '51199',
 '55901': '55115',
 '02270': '02158',
 '46113': '46102',
 '51515': '51019'}

In [11]:
#Update FIPS
for old_FIPS, new_FIPS in FIPS_swapper.items():
    df['FIPS'] = df.FIPS.replace(old_FIPS, new_FIPS)

In [12]:
#Combine land area for combined counties
df = df.groupby(['FIPS']).agg({'land_area_sqmi':'sum'}).reset_index()

In [14]:
df = df.loc[~(df.land_area_sqmi==0)].reset_index(drop=True)
df

Unnamed: 0,FIPS,land_area_sqmi
0,01001,594.44
1,01003,1589.78
2,01005,884.88
3,01007,622.58
4,01009,644.78
...,...,...
3108,56037,10426.65
3109,56039,3995.38
3110,56041,2081.26
3111,56043,2238.55


In [15]:
FIPS_swapper = {'09005':'09160',
               '09009':'09140',
               '09003':'09110',
               '09007':'09130',
               '09015':'09150',
               '09011':'09180'}

In [16]:
df_CT = df.loc[(df.FIPS.isin(FIPS_swapper))]
df_CT

Unnamed: 0,FIPS,land_area_sqmi
308,9003,735.1
309,9005,920.56
310,9007,369.3
311,9009,604.51
312,9011,664.88
313,9015,512.91


In [17]:
for old_FIPS, new_FIPS in FIPS_swapper.items():
    df_CT.loc[(df['FIPS'] == old_FIPS), "FIPS"] = new_FIPS
df_CT

Unnamed: 0,FIPS,land_area_sqmi
308,9110,735.1
309,9160,920.56
310,9130,369.3
311,9140,604.51
312,9180,664.88
313,9150,512.91


In [18]:
df = pd.concat([df,df_CT]).reset_index(drop=True)
df

Unnamed: 0,FIPS,land_area_sqmi
0,01001,594.44
1,01003,1589.78
2,01005,884.88
3,01007,622.58
4,01009,644.78
...,...,...
3114,09160,920.56
3115,09130,369.30
3116,09140,604.51
3117,09180,664.88


In [19]:
df.loc[len(df.index)] = ['02063', 9529.8]
df.loc[len(df.index)] = ['02066', 24692.1]
df.loc[len(df.index)] = ['09120', 140.2]
df

Unnamed: 0,FIPS,land_area_sqmi
0,01001,594.44
1,01003,1589.78
2,01005,884.88
3,01007,622.58
4,01009,644.78
...,...,...
3117,09180,664.88
3118,09150,512.91
3119,02063,9529.80
3120,02066,24692.10


In [20]:
df = df.sort_values('FIPS')
df

Unnamed: 0,FIPS,land_area_sqmi
0,01001,594.44
1,01003,1589.78
2,01005,884.88
3,01007,622.58
4,01009,644.78
...,...,...
3108,56037,10426.65
3109,56039,3995.38
3110,56041,2081.26
3111,56043,2238.55


In [22]:
#df.to_csv('county_land_area_2010_cleaned.csv',index=False)