In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster 


%matplotlib inline
import geopandas as gpd
import numpy as np

import matplotlib.pylab as pylab
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import json


<h2> read in CSVs as DataFrames

In [2]:
clusters = pd.read_csv('..\data\clusters.csv')
clusters.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cluster Name        62 non-null     object 
 1   Facility Type       62 non-null     object 
 2   Cluster Start Date  62 non-null     object 
 3   # Cases             62 non-null     int64  
 4   Latitude            52 non-null     float64
 5   Longitude           52 non-null     object 
dtypes: float64(1), int64(1), object(4)
memory usage: 3.0+ KB


In [3]:
clusters_by_type = pd.read_csv('..\data\clusters_by_type.csv')

In [4]:
covid_only = pd.read_csv('..\data\covid_only.csv')

<h2> Get a 'geometry' column that can be mapped w/ geopandas

In [5]:
clusters['Latitude'].astype(float)
clusters = clusters.dropna().reset_index()
clusters

Unnamed: 0,index,Cluster Name,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude
0,0,Vanderbilt Parties,Social Gathering,3/11/2020,49,36.125891,-86.822863
1,1,Event at Clementine Hall,Social Gathering,3/14/2020,23,36.152444,-86.8467716
2,3,The Health Center at Richland Place,LTCF,4/3/2020,47,36.12875,-86.8195333
3,4,Trevecca Center for Rehab and Healing,LTCF,4/4/2020,102,36.144562,-86.7567485
4,5,Tyson Foods,Commercial-Warehouse,4/6/2020,280,36.198993,-89.8367566
5,6,Nashville Center for Rehab and Healing,LTCF,4/8/2020,12,36.134557,-86.7831177
6,7,CDM Jail,Correctional Facility,4/13/2020,22,36.0882,-86.6861683
7,8,Bordeaux,LTCF,4/15/2020,47,36.180317,-86.8504781
8,9,Cargill,Commercial-Warehouse,4/21/2020,22,36.112551,-86.759455
9,10,Bethany Center for Rehab and Healing,LTCF,4/27/2020,133,36.049614,-86.7189497


In [6]:
#clean up lat and long numbers, convert to floats
clusters['Latitude'] = clusters['Latitude'].astype(float)
clusters.iloc[25,6] = -86.7779142
clusters.iloc[4,5] = 36.3319741
clusters.iloc[4,6] = -86.7134524
#dropping Tyson Foods since the plant isn't in Davidson county

In [7]:
clusters['geometry'] = clusters.apply(lambda x: Point((float(x.Longitude),float(x.Latitude))), axis=1)

In [8]:
#convert to Geo DF

In [9]:
#covid_only = gpd.GeoDataFrame(covid_only, geometry=covid_only['geometry'])

In [10]:
clusters = gpd.GeoDataFrame(clusters,geometry=clusters['geometry'])

<h2> read in geojson of Davidson County ZIP codes </h2>
heat map of zip code violations with points of clusters that are color coded by type and when hovered over, show how many cases

In [11]:
f = open('../data/zip_codes.geojson')

In [12]:
zipcodes = json.load(f)

In [13]:
zipcodes['features'][0]

{'type': 'Feature',
 'properties': {'zip': '37115',
  'objectid': '1',
  'po_name': 'MADISON',
  'shape_stlength': '178783.02488886821',
  'shape_starea': '596553400.57885742'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-86.68724897141, 36.318212121092],
     [-86.687216475338, 36.318210560887],
     [-86.687181702435, 36.318208780693],
     [-86.687146962761, 36.318206917893],
     [-86.687112187256, 36.318205000347],
     [-86.687077445444, 36.318202972151],
     [-86.687042702928, 36.318200862685],
     [-86.687034690751, 36.318200362824],
     [-86.687007959928, 36.318198697129],
     [-86.68697325063, 36.318196422067],
     [-86.686938539918, 36.318194065167],
     [-86.686903827781, 36.318191625285],
     [-86.68686914958, 36.318189102792],
     [-86.686834437433, 36.318186498074],
     [-86.6867997571, 36.318183810757],
     [-86.686765111412, 36.318181040826],
     [-86.686730430357, 36.318178188106],
     [-86.686695782299, 36.318175225883],
     [-86.68666116

In [14]:
#group by on covid_only to get a count of violations by zip code

In [15]:
covid_zip = pd.DataFrame(covid_only['ZIP'].value_counts()).reset_index()
covid_zip = covid_zip.rename(columns={'index':'ZIP','ZIP':'Violations'})

<h2> Merge zipcodes (geojson) and covid_zip

In [16]:
#get the names to be the same
#zipcodes['ZIP'] = zipcodes['zip']
#make sure data type is the same
#zipcodes['ZIP'] = zipcodes['ZIP'].astype(int)

In [17]:
#i might not actually want to do this part if i'm using choropleth:
#covid_zip = pd.merge(covid_zip,zipcodes, how='inner', on='ZIP')

In [28]:
clusters

Unnamed: 0,index,Cluster Name,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,geometry
0,0,Vanderbilt Parties,Social Gathering,3/11/2020,49,36.125891,-86.822863,POINT (-86.82286 36.12589)
1,1,Event at Clementine Hall,Social Gathering,3/14/2020,23,36.152444,-86.8467716,POINT (-86.84677 36.15244)
2,3,The Health Center at Richland Place,LTCF,4/3/2020,47,36.12875,-86.8195333,POINT (-86.81953 36.12875)
3,4,Trevecca Center for Rehab and Healing,LTCF,4/4/2020,102,36.144562,-86.7567485,POINT (-86.75675 36.14456)
4,5,Tyson Foods,Commercial-Warehouse,4/6/2020,280,36.331974,-86.7135,POINT (-86.71345 36.33197)
5,6,Nashville Center for Rehab and Healing,LTCF,4/8/2020,12,36.134557,-86.7831177,POINT (-86.78312 36.13456)
6,7,CDM Jail,Correctional Facility,4/13/2020,22,36.0882,-86.6861683,POINT (-86.68617 36.08820)
7,8,Bordeaux,LTCF,4/15/2020,47,36.180317,-86.8504781,POINT (-86.85048 36.18032)
8,9,Cargill,Commercial-Warehouse,4/21/2020,22,36.112551,-86.759455,POINT (-86.75946 36.11255)
9,10,Bethany Center for Rehab and Healing,LTCF,4/27/2020,133,36.049614,-86.7189497,POINT (-86.71895 36.04961)


<h2> Plotting the info

In [18]:
#first let's get the base

In [35]:
base = px.choropleth(covid_zip, geojson=zipcodes,locations='ZIP',
                     color='Violations',scope='usa',
                     featureidkey='properties.zip',
                     color_continuous_scale='greys')
base.update_geos(fitbounds='locations',visible=False)

clusters.plot(ax=base, column='geometry', markersize=clusters['# Cases'])


#i want to adjust figsize, figure out why there's a blank space in the middle, add on the points

AttributeError: 'Figure' object has no attribute 'set_aspect'