# Join CSV and Shapefiles

In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster



In [2]:
# import shapefile, 500Cities_Tracts
census_tracts = gpd.read_file('../data/location/500Cities_Tracts/500Cities_Tracts_Clip.shp')
print(census_tracts.crs)
census_tracts.head( )

epsg:3857


Unnamed: 0,place2010,tract2010,ST,PlaceName,plctract10,PlcTrPop10,geometry
0,107000,1073000100,1,Birmingham,0107000-01073000100,3042,"POLYGON ((-9653001.056 3974630.899, -9652982.0..."
1,107000,1073000300,1,Birmingham,0107000-01073000300,2735,"POLYGON ((-9657462.853 3968794.996, -9657470.2..."
2,107000,1073000400,1,Birmingham,0107000-01073000400,3338,"POLYGON ((-9657275.613 3972217.160, -9657275.3..."
3,107000,1073000500,1,Birmingham,0107000-01073000500,2864,"POLYGON ((-9660666.407 3968887.964, -9660631.7..."
4,107000,1073000700,1,Birmingham,0107000-01073000700,2577,"POLYGON ((-9661391.097 3968591.429, -9661398.2..."


In [3]:
# Filter rows outside of Nashville
census_tracts = census_tracts[census_tracts.PlaceName == 'Nashville']
census_tracts

Unnamed: 0,place2010,tract2010,ST,PlaceName,plctract10,PlcTrPop10,geometry
23537,4752006,47037010103,47,Nashville,4752006-47037010103,2377,"POLYGON ((-9665101.952 4349117.051, -9665110.6..."
23538,4752006,47037010104,47,Nashville,4752006-47037010104,3070,"POLYGON ((-9663717.025 4341008.540, -9663723.7..."
23539,4752006,47037010105,47,Nashville,4752006-47037010105,4268,"POLYGON ((-9665970.800 4337243.674, -9665968.2..."
23540,4752006,47037010106,47,Nashville,4752006-47037010106,2668,"POLYGON ((-9660650.952 4338992.549, -9660629.5..."
23541,4752006,47037010201,47,Nashville,4752006-47037010201,4041,"POLYGON ((-9656019.726 4341048.317, -9655844.8..."
...,...,...,...,...,...,...,...
23691,4752006,47037019500,47,Nashville,4752006-47037019500,5916,"POLYGON ((-9658131.122 4322531.407, -9658130.5..."
23692,4752006,47037019600,47,Nashville,4752006-47037019600,3764,"POLYGON ((-9651287.086 4323212.955, -9651268.6..."
24585,4752006,47037010301,47,Nashville,4752006-47037010301,0,"MULTIPOLYGON (((-9655155.660 4340929.541, -965..."
24586,4752006,47037980100,47,Nashville,4752006-47037980100,0,"POLYGON ((-9647676.660 4320095.393, -9647190.8..."


In [4]:
# import all_providers location info
all_providers = pd.read_csv('../data/location/all_providers_geocoded.csv')
print(all_providers.shape)
all_providers.head(20)

(2316, 9)


Unnamed: 0.1,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude
0,0,Ronald Brizendine,#6 sixth street suite 205,,Bristol,TN,37620.0,,
1,1,Tayebeh Asad sangabi,1 innis brook lane,,Brentwood,TN,37027.0,-86.731094,35.970486
2,2,Tim Bongartz,1 medical center blvd,,Cookeville,TN,38501.0,-85.50952,36.170155
3,3,Frances Thomason,1 medical center blvd,,Cookeville,TN,385014294.0,-85.50952,36.170155
4,4,Jasmine Olive,1 medical center blvd,,Cookeville,TN,38501.0,-85.50952,36.170155
5,5,Cassondra Horn,1 medical park blvd,,Bristol,TN,376207430.0,,
6,6,Marilyn Goldstein,1 medical park blvd,,Bristol,TN,376207430.0,,
7,7,Barbara Smith,1 medical park blvd,Suite 458-w,Bristol,TN,376207430.0,,
8,8,Mary King,1 medical park blvd,,Bristol,TN,376207430.0,,
9,9,Cynthia Johnson,1 medical park blvd,,Bristol,TN,376207430.0,,


In [5]:
# Filter rows outside of Nashville
all_providers = all_providers[all_providers.city == 'Nashville']
all_providers

Unnamed: 0.1,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude
43,43,Leonard Webster,1005 d.b. todd blvd,,Nashville,TN,37208.0,-86.806170,36.168274
45,45,Carol Ziegler,"1005 db todd blvd, suite 300",Meharry family practice clinic,Nashville,TN,372083599.0,,
46,46,Nitara Carswell,1005 dr d.b. todd jr blvd,,Nashville,TN,372083501.0,-86.806170,36.168274
47,47,Lloyda Williamson,1005 dr db todd jr blvd,,Nashville,TN,372083501.0,-86.806170,36.168274
48,48,Vincent Morelli,1005 dr. d. b. todd blvd,,Nashville,TN,37208.0,,
...,...,...,...,...,...,...,...,...,...
2311,2311,Meredith Coleman,Vumc anesthesiology,1301 medical center drive,Nashville,TN,372325614.0,-86.800220,36.141613
2312,2312,John Heaphy,Vumc dept of oto med ctr east south tower,"1215 21st avenue south, suite 7209",Nashville,TN,372328605.0,-86.800220,36.141613
2313,0,East Public Health Center,1015 East Trinity Lane,,Nashville,TN,,-86.745286,36.204273
2314,1,Woodbine Public Health Center,224 Oriel Avenue,,Nashville,TN,,-86.743627,36.122097


In [6]:
# drop rows w/o longitude and latitude
all_providers = all_providers[all_providers['longitude'].notna()]
all_providers.head()

Unnamed: 0.1,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude
43,43,Leonard Webster,1005 d.b. todd blvd,,Nashville,TN,37208.0,-86.80617,36.168274
46,46,Nitara Carswell,1005 dr d.b. todd jr blvd,,Nashville,TN,372083501.0,-86.80617,36.168274
47,47,Lloyda Williamson,1005 dr db todd jr blvd,,Nashville,TN,372083501.0,-86.80617,36.168274
54,54,Olabisi Akanbi,1005 dr. d.b. todd jr. boulevard,,Nashville,TN,372083599.0,-86.80617,36.168274
73,73,William Worthington,101 hillwood blvd,,Nashville,TN,372052811.0,-86.85317,36.122086


In [7]:
# add geometry to all_providers
all_providers['geometry'] = all_providers.apply(lambda x: Point((float(x.longitude), 
                                                         float(x.latitude))), axis=1)
all_providers.tail(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0.1,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude,geometry
2028,2028,Alvin Powers,7465 mrb iv,"Vanderbilt university medical, diabetes",Nashville,TN,37232.0,-86.80022,36.141613,POINT (-86.80022 36.141613)
2040,2040,Scott Cranford,7629 highway 70 s,,Nashville,TN,372211706.0,-86.95398,36.07984,POINT (-86.95398 36.07984)
2041,2041,Steven Walker,7648 highway 70 s,Suite 16,Nashville,TN,372211742.0,-86.95398,36.07984,POINT (-86.95398 36.07984)
2084,2084,Seth Karp,805 oxford house,1313 21st avenue south,Nashville,TN,372324745.0,-86.80022,36.141613,POINT (-86.80022 36.141613)
2091,2091,George Livingston,810 dominican dr,Suite 211,Nashville,TN,372281906.0,-86.802635,36.18696,POINT (-86.802635 36.18696)
2097,2097,James Moore,8122 sawyer brown rd,Suite 206,Nashville,TN,372211411.0,-86.93957,36.084946,POINT (-86.93956999999997 36.084946)
2167,2167,Kimberly Pace,904 locklayer st,,Nashville,TN,372083122.0,-86.79131,36.1709,POINT (-86.79131 36.1709)
2216,2216,Daniel Wood,95 white bridge rd,Suite 415,Nashville,TN,372051497.0,-86.85763,36.131966,POINT (-86.85763 36.131966)
2298,2298,Howard Curlin,Department of ob gyn,R-1200 medical center north,Nashville,TN,372320001.0,-86.80022,36.141613,POINT (-86.80022 36.141613)
2299,2299,Elizabeth Lio,Department of radiology medical center nort,,Nashville,TN,372320001.0,-86.80022,36.141613,POINT (-86.80022 36.141613)


In [8]:
# create geodataframe from all_providers
all_providers_geo = gpd.GeoDataFrame(all_providers, 
                           crs = census_tracts.crs, 
                           geometry = all_providers['geometry'])

In [9]:
all_providers_geo

Unnamed: 0.1,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude,geometry
43,43,Leonard Webster,1005 d.b. todd blvd,,Nashville,TN,37208.0,-86.806170,36.168274,POINT (-86.80617 36.16827)
46,46,Nitara Carswell,1005 dr d.b. todd jr blvd,,Nashville,TN,372083501.0,-86.806170,36.168274,POINT (-86.80617 36.16827)
47,47,Lloyda Williamson,1005 dr db todd jr blvd,,Nashville,TN,372083501.0,-86.806170,36.168274,POINT (-86.80617 36.16827)
54,54,Olabisi Akanbi,1005 dr. d.b. todd jr. boulevard,,Nashville,TN,372083599.0,-86.806170,36.168274,POINT (-86.80617 36.16827)
73,73,William Worthington,101 hillwood blvd,,Nashville,TN,372052811.0,-86.853170,36.122086,POINT (-86.85317 36.12209)
...,...,...,...,...,...,...,...,...,...,...
2311,2311,Meredith Coleman,Vumc anesthesiology,1301 medical center drive,Nashville,TN,372325614.0,-86.800220,36.141613,POINT (-86.80022 36.14161)
2312,2312,John Heaphy,Vumc dept of oto med ctr east south tower,"1215 21st avenue south, suite 7209",Nashville,TN,372328605.0,-86.800220,36.141613,POINT (-86.80022 36.14161)
2313,0,East Public Health Center,1015 East Trinity Lane,,Nashville,TN,,-86.745286,36.204273,POINT (-86.74529 36.20427)
2314,1,Woodbine Public Health Center,224 Oriel Avenue,,Nashville,TN,,-86.743627,36.122097,POINT (-86.74363 36.12210)


In [10]:
# take another look at census tracts file, notice geometry is Polygons, compared to Points in all_providers_geo
census_tracts

Unnamed: 0,place2010,tract2010,ST,PlaceName,plctract10,PlcTrPop10,geometry
23537,4752006,47037010103,47,Nashville,4752006-47037010103,2377,"POLYGON ((-9665101.952 4349117.051, -9665110.6..."
23538,4752006,47037010104,47,Nashville,4752006-47037010104,3070,"POLYGON ((-9663717.025 4341008.540, -9663723.7..."
23539,4752006,47037010105,47,Nashville,4752006-47037010105,4268,"POLYGON ((-9665970.800 4337243.674, -9665968.2..."
23540,4752006,47037010106,47,Nashville,4752006-47037010106,2668,"POLYGON ((-9660650.952 4338992.549, -9660629.5..."
23541,4752006,47037010201,47,Nashville,4752006-47037010201,4041,"POLYGON ((-9656019.726 4341048.317, -9655844.8..."
...,...,...,...,...,...,...,...
23691,4752006,47037019500,47,Nashville,4752006-47037019500,5916,"POLYGON ((-9658131.122 4322531.407, -9658130.5..."
23692,4752006,47037019600,47,Nashville,4752006-47037019600,3764,"POLYGON ((-9651287.086 4323212.955, -9651268.6..."
24585,4752006,47037010301,47,Nashville,4752006-47037010301,0,"MULTIPOLYGON (((-9655155.660 4340929.541, -965..."
24586,4752006,47037980100,47,Nashville,4752006-47037980100,0,"POLYGON ((-9647676.660 4320095.393, -9647190.8..."


In [25]:
# Spatial Join isn't working for me. I think this is a dead end
result = gpd.sjoin(all_providers_geo, census_tracts, op = 'contains', how='left')

In [24]:
result

Unnamed: 0.1,index_left,Unnamed: 0,full_name,address,address2,city,state,zip,longitude,latitude,place2010,tract2010,ST,PlaceName,plctract10,PlcTrPop10,geometry
23537,,,,,,,,,,,4752006,47037010103,47,Nashville,4752006-47037010103,2377,"POLYGON ((-9665101.952 4349117.051, -9665110.6..."
23538,,,,,,,,,,,4752006,47037010104,47,Nashville,4752006-47037010104,3070,"POLYGON ((-9663717.025 4341008.540, -9663723.7..."
23539,,,,,,,,,,,4752006,47037010105,47,Nashville,4752006-47037010105,4268,"POLYGON ((-9665970.800 4337243.674, -9665968.2..."
23540,,,,,,,,,,,4752006,47037010106,47,Nashville,4752006-47037010106,2668,"POLYGON ((-9660650.952 4338992.549, -9660629.5..."
23541,,,,,,,,,,,4752006,47037010201,47,Nashville,4752006-47037010201,4041,"POLYGON ((-9656019.726 4341048.317, -9655844.8..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23691,,,,,,,,,,,4752006,47037019500,47,Nashville,4752006-47037019500,5916,"POLYGON ((-9658131.122 4322531.407, -9658130.5..."
23692,,,,,,,,,,,4752006,47037019600,47,Nashville,4752006-47037019600,3764,"POLYGON ((-9651287.086 4323212.955, -9651268.6..."
24585,,,,,,,,,,,4752006,47037010301,47,Nashville,4752006-47037010301,0,"MULTIPOLYGON (((-9655155.660 4340929.541, -965..."
24586,,,,,,,,,,,4752006,47037980100,47,Nashville,4752006-47037980100,0,"POLYGON ((-9647676.660 4320095.393, -9647190.8..."


In [14]:
# save new dataframes
print(type(all_providers_geo))
print(type(census_tracts))


# result.to_file('../data/clean/test.shp')
# countries_gdf.to_file("countries.shp")
# # countries_gdf.to_file("countries.geojson", driver='GeoJSON')
# result.to_file("../data/clean/test.geojson", driver='GeoJSON')


<class 'geopandas.geodataframe.GeoDataFrame'>
<class 'geopandas.geodataframe.GeoDataFrame'>
