In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from PIL import Image, ImageOps
from plotnine import (ggplot, aes, geom_map, geom_text, geom_label, 
                      ggtitle, element_blank, element_rect, 
                      scale_fill_manual, theme_minimal, theme) 
from pulp import (LpProblem, LpMinimize, LpVariable, lpSum, 
                  PULP_CBC_CMD, GLPK_CMD, LpStatus, value) 

In [2]:
df=pd.read_csv('census.csv')
df.head()


Unnamed: 0,county,population,total_units,occupied_units,vacant_units
0,Adair,7496,3585,3217,368
1,Adams,3704,1888,1614,274
2,Allamakee,14061,7668,5797,1871
3,Appanoose,12317,6306,5320,986
4,Audubon,5674,2787,2498,289


In [3]:
df['county_id']=np.arange(0, 99)
df.head()

Unnamed: 0,county,population,total_units,occupied_units,vacant_units,county_id
0,Adair,7496,3585,3217,368,0
1,Adams,3704,1888,1614,274,1
2,Allamakee,14061,7668,5797,1871,2
3,Appanoose,12317,6306,5320,986,3
4,Audubon,5674,2787,2498,289,4


In [4]:
df.insert(0, 'county_id', df.pop('county_id'))

In [5]:
df.head()

Unnamed: 0,county_id,county,population,total_units,occupied_units,vacant_units
0,0,Adair,7496,3585,3217,368
1,1,Adams,3704,1888,1614,274
2,2,Allamakee,14061,7668,5797,1871
3,3,Appanoose,12317,6306,5320,986
4,4,Audubon,5674,2787,2498,289


In [6]:
shapefile_iowa = gpd.read_file('IA_counties/IA_counties.shp')
shapefile_iowa.head()

Unnamed: 0,STATEFP10,COUNTYFP10,GEOID10,NAME10,NAMELSAD10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,TOTPOP,...,TOTVOT12,PRES12D,PRES12R,PRES12OTH,TOTVOT16,PRES16D,PRES16R,PRES16OTH,CD,geometry
0,19,127,19127,Marshall,Marshall County,1482770678,1803086,42.041691,-92.9814523,40648,...,19064,10257,8472,335,17980,7652,9146,1182,1,"POLYGON ((-92.76679 42.12346, -92.76679 42.122..."
1,19,11,19011,Benton,Benton County,1855117342,5760770,42.0925474,-92.05763,26076,...,14023,6862,6940,221,13844,4678,8232,934,1,"POLYGON ((-91.94773 41.86186, -91.95514 41.861..."
2,19,41,19041,Clay,Clay County,1469139214,13866941,43.079822,-95.1497261,16667,...,8502,3385,4951,166,8617,2249,5877,491,4,"POLYGON ((-95.26926 43.25537, -95.26140 43.255..."
3,19,165,19165,Shelby,Shelby County,1530110414,1486135,41.6790143,-95.3089173,12167,...,6483,2469,3911,103,6370,1662,4362,346,4,"POLYGON ((-95.20902 41.86371, -95.20890 41.863..."
4,19,43,19043,Clayton,Clayton County,2016405612,36586071,42.8409979,-91.3235108,18129,...,9138,4806,4164,168,9129,3237,5317,575,1,"POLYGON ((-91.25080 42.64558, -91.25160 42.645..."


In [7]:
shapefile_iowa[['COUNTYFP10','NAME10','INTPTLAT10', 'INTPTLON10' ]].sort_values

<bound method DataFrame.sort_values of    COUNTYFP10         NAME10   INTPTLAT10    INTPTLON10
0         127       Marshall  +42.0416910  -092.9814523
1         011         Benton  +42.0925474  -092.0576300
2         041           Clay  +43.0798220  -095.1497261
3         165         Shelby  +41.6790143  -095.3089173
4         043        Clayton  +42.8409979  -091.3235108
..        ...            ...          ...           ...
94        111            Lee  +40.6475875  -091.4771574
95        169          Story  +42.0375379  -093.4660934
96        075         Grundy  +42.4033232  -092.7902613
97        155  Pottawattamie  +41.3401835  -095.5449053
98        089         Howard  +43.3653125  -092.3219084

[99 rows x 4 columns]>

In [8]:
df1=df.copy()

dummy=pd.DataFrame()
dummy['county']=shapefile_iowa['NAME10']

In [9]:
dummy['longitude']=shapefile_iowa['INTPTLON10']
dummy['latitude']=shapefile_iowa['INTPTLAT10']

In [10]:
dummy1=dummy.sort_values('county', ascending=True).copy()

In [11]:
df1['latitude']=dummy1['latitude']
df1['longitude']=dummy1['longitude']

In [12]:
df1.head()

Unnamed: 0,county_id,county,population,total_units,occupied_units,vacant_units,latitude,longitude
0,0,Adair,7496,3585,3217,368,42.041691,-92.9814523
1,1,Adams,3704,1888,1614,274,42.0925474,-92.05763
2,2,Allamakee,14061,7668,5797,1871,43.079822,-95.1497261
3,3,Appanoose,12317,6306,5320,986,41.6790143,-95.3089173
4,4,Audubon,5674,2787,2498,289,42.8409979,-91.3235108


In [13]:
map_population_by_county_data = shapefile_iowa.merge(df1, left_on='NAME10', right_on='county',suffixes=('_left', '_right'))
county_populations = np.array(df1['population'])
state_population = sum(county_populations)
df1.sort_values('population', ascending=False).head()

Unnamed: 0,county_id,county,population,total_units,occupied_units,vacant_units,latitude,longitude
76,76,Polk,492401,210184,196891,13293,40.9153392,-91.1869253
56,56,Linn,230299,101230,94751,6479,42.3918601,-95.5074206
81,81,Scott,174669,77771,71628,6143,42.8644496,-91.8393728
51,51,Johnson,152854,65916,61335,4581,41.3307969,-92.6363663
6,6,Black Hawk,131144,58559,54223,4336,42.3907681,-93.709198


In [14]:
map_population_by_county_data

Unnamed: 0,STATEFP10,COUNTYFP10,GEOID10,NAME10,NAMELSAD10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,TOTPOP,...,CD,geometry,county_id,county,population,total_units,occupied_units,vacant_units,latitude,longitude


In [None]:
map_population_by_county = (
    ggplot(map_population_by_county_data)
    + geom_map(aes(fill='population'))
    + geom_label(aes(x='longitude', y='latitude', 
                     label='population',size=2), 
                 show_legend=False)
    + theme_minimal()
    + theme(axis_text_x=element_blank(),
            axis_text_y=element_blank(),
            axis_title_x=element_blank(),
            axis_title_y=element_blank(),
            axis_ticks=element_blank(),
            panel_grid_major=element_blank(),
            panel_grid_minor=element_blank()
           )
    )
map_population_by_county