# COVID-19 for New York State
---

**Purpose:**

Review daily data

**Data Source:**

https://data.ny.gov/

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('seaborn-darkgrid')

In [None]:
URL = 'https://health.data.ny.gov/api/views/xdss-u53e/rows.csv?accessType=DOWNLOAD'

In [None]:
print(URL)

## Analyze the Dataframes

In [None]:
df = pd.read_csv(URL, parse_dates=['Test Date'])
df.info()

In [None]:
# check for missing data

df.isna().sum()

In [None]:
# review sample data

df.sample(10)

In [None]:
# review top 10

df.nlargest(columns='New Positives', n=10)

In [None]:
# review zeroes

df.query("`New Positives` == 0")

In [None]:
# review by extact date

df.groupby('Test Date').sum()\
.plot(subplots=True, alpha=.5, layout=(2,2), figsize=(14,10));

In [None]:
# review by monthly sum

df.groupby(pd.Grouper(key='Test Date', freq='M')).sum()\
.plot(subplots=True, lw=3, ls='dashed', layout=(2,2), marker='o', markerfacecolor='k', figsize=(14,10));

In [None]:
# review by weekly average

df.groupby(pd.Grouper(key='Test Date', freq='W')).mean()\
.plot.bar(subplots=True, layout=(2,2), figsize=(14,10));

In [None]:
# summarize by county

df.groupby('County')['New Positives'].sum().sort_values()\
.plot.barh(figsize=(12,15));

In [None]:
# get geospatial info for counties

URL = 'https://data.ny.gov/api/views/4xc7-bukh/rows.csv?accessType=DOWNLOAD&sorting=true'
print(URL)

In [None]:
cs = pd.read_csv(URL)
cs.info()

In [None]:
cs.head()

In [None]:
# use set to see if all the counties are there

missing_county = set(df.County).difference(cs.County)
missing_county

In [None]:
df[df['County'].isin(missing_county)]

In [None]:
missing_county = set(cs.County).difference(df.County)
missing_county

In [None]:
# fix county name so that both dataframes will align

cs['County'] = cs.County.str.replace('St Lawrence', 'St. Lawrence')

In [None]:
# merge dataframes based on the last test date

xf = pd.merge(left=df[df['Test Date'] == df['Test Date'].max()], right=cs, how='left')
xf.head()

In [None]:
# check for missing values

xf.isna().sum()

In [None]:
# check to see if there is only the last date

xf['Test Date'].unique()

## Map the Dataframes

In [None]:
!pip install folium --upgrade

In [None]:
import folium

In [None]:
# find the middle of the map
median_lat = xf.Latitude.median()
median_lon = xf.Longitude.median()

# create the map
m = folium.Map(location=(median_lat,median_lon), zoom_start=7, tiles='CartoDB Positron')

# add markers to the map
for index, row in xf.iterrows():
    color_match = "red" if row['Cumulative Number of Positives'] > 1000 else 'blue'
    popup_string = "County: {}, Positives: {:,}, Tests: {:,}"\
    .format(row['County'], row['Cumulative Number of Positives'], row['Cumulative Number of Tests Performed'])
    folium.Marker(location=(row['Latitude'], row['Longitude']),
                  popup=popup_string,
                  icon=folium.Icon(color=color_match, icon='info-sign'),
                  opacity=.4).add_to(m)

# show the map
m