
- toc: false
- comments: true
- image: images/US_case_count.png
- hide: false
- search_exclude: false
- categories: geospatial
- author: Shantam Raj
- badges: true

In [2]:
import geopandas as gpd
import altair as alt
import pandas as pd
alt.renderers.set_embed_options(actions=False)

# Shapefiles from us census
state_shpfile = './shapes/cb_2019_us_state_20m'
county_shpfile = './shapes/cb_2019_us_county_20m'
states = gpd.read_file(state_shpfile)
county = gpd.read_file(county_shpfile)

# Adding longitude and latitude in state data
states['lon'] = states['geometry'].centroid.x
states['lat'] = states['geometry'].centroid.y

# Adding longitude and latitude in state data
county['lon'] = county['geometry'].centroid.x
county['lat'] = county['geometry'].centroid.y


DriverError: ./shapes/cb_2019_us_state_20m: No such file or directory

In [None]:
# NYT dataset
county_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
cdf = pd.read_csv(county_url)

In [None]:
cdf[cdf['fips'].isnull() == True].groupby(['county']).sum()

In [None]:
#hide_output
cdf[cdf['fips'].isnull() == True].groupby(['county', 'state']).sum()

NYT publishes the data for New York City in a different way by combining the results of the 5 boroughs that comprise it. So we will combine them too and add a new row in the dataset with a custom `fips` of 1. Let's start by making this change in the raw NYT dataset itself.

In [None]:
cdf.loc[cdf['county'] == 'New York City','fips'] = 1
cdf[cdf['county'] == 'New York City']

In [None]:
# collapse
latest_cases = cdf.groupby('fips', as_index=False).agg({'county': 'last', 'date': 'last', 'state': 'last', 'cases': 'last', 'deaths': 'last'})
latest_cases

In [None]:
Now we have to make the changes in our shapefile too. For that we need to **dissolve** the 5 buroughs into one single geospatial entity.

In [None]:
#New York City fips = 36005', '36047', '36061', '36081', '36085 which corresponds to New York, Kings, Queens, Bronx and Richmond
spatial_nyc = county[county['GEOID'].isin(['36005', '36047', '36061', '36081', '36085'])]

In [None]:
combined_nyc = spatial_nyc.dissolve(by='STATEFP')
alt.Chart(spatial_nyc).mark_geoshape(stroke='white', strokeWidth=3).encode() | alt.Chart(combined_nyc).mark_geoshape(stroke='white', strokeWidth=3).encode()

In [None]:
agg_nyc_data = spatial_nyc.dissolve(by='STATEFP').reset_index()
agg_nyc_data['GEOID'] = '1'
agg_nyc_data['fips'] = 1
agg_nyc_data['lon'] = agg_nyc_data['geometry'].centroid.x
agg_nyc_data['lat'] = agg_nyc_data['geometry'].centroid.y

In [None]:
agg_nyc_data

In [None]:
# hide_output
county_nyc = gpd.GeoDataFrame(pd.concat([county, agg_nyc_data], ignore_index=True))
county_nyc['fips'] = county_nyc['GEOID']
county_nyc['fips'] = county_nyc['fips'].astype('int')
county_nyc
# generate FIPS in the shapefile itself by combining STATEFP and COUNTYFP
#county2['STATEFP'] + county2['COUNTYFP']
#latest_cases['fips'] = latest_cases['fips'].astype('int')


In [None]:
latest_cases['fips'].isin(county_nyc['fips']).value_counts()

In [None]:
latest_cases[latest_cases['county'] == 'New York City']

In [None]:
county_nyc[county_nyc['fips'] == 1]

In [None]:
# collapse
latest_cases_w_fips = county_nyc.merge(latest_cases, how='left', on='fips')

circle_selection = alt.selection_single(on='mouseover', empty='none')

circles = alt.Chart(latest_cases_w_fips).mark_point(fillOpacity=0.2, fill='red', strokeOpacity=1, color='red', strokeWidth=1).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    size=alt.Size('cases:Q', scale=alt.Scale(domain=[0, 7000],),legend=alt.Legend(title="Cases")),
    tooltip=['county:N', 'cases:Q', 'deaths:Q'],
    color = alt.condition(circle_selection, alt.value('black'), alt.value('red'))
).project(
    type='albersUsa'
).properties(
    width=1000,
    height=700
).add_selection(
    circle_selection
)

state = alt.Chart(states).mark_geoshape(fill='#ededed', stroke='white').encode(
).project(
    type='albersUsa'
)

state_text = state.mark_text().transform_filter(alt.datum.NAME != 'Puerto Rico').encode(
    longitude='lon:Q',
    latitude='lat:Q',
    text='NAME',
).project(
    type='albersUsa'
)


In [None]:
test= alt.Chart(source).mark_image(
    width=50,
    height=50
).encode(
    x='20',
    y='20',
    url='../dp.jpg'
)

In [None]:
(state+circles+state_text).configure_view(strokeWidth=0)