# Open NYC COVID-19 Analysis 

This notebook is meant to serve as a starter for anyone interested in diving into the most recent data surrounding the NYC outbreak of COVID-19.

Datasets:
- Epidemiology Data: https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
- Population Data: https://population.un.org/wpp/Download/Standard/Population/

In [1]:
import pandas as pd
import cufflinks as cf
import plotly.graph_objs as go
import plotly.express as px
import numpy as np
from scipy.optimize import curve_fit
from scipy.integrate import odeint
from typing import List
from IPython.display import display
cf.go_offline()

## Fetch data from NYTimes

https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv

In [2]:
nytimes_data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
nytimes_data.set_index("date", inplace=True)
nytimes_data[nytimes_data["county"] == "New York City"][["cases", "deaths"]].iplot()
nytimes_data.to_csv("output/nytime_data.csv")

## Fetch data from JHU
https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports

In [3]:
daily_jhu_report_url_base = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/%s.csv"
def gather_daily_jhu_reports(date_rng):
    collected = pd.DataFrame()
    for date in date_rng:
        date_str = date.strftime("%m-%d-%Y")
        print("Collecting data from: %s" % date_str)
        try:
            daily_report = pd.read_csv(daily_jhu_report_url_base % date_str)
            daily_report.insert(0, "Date", date)
            daily_report.rename(columns={
                'Country_Region':'Country/Region', 
                "Province_State": "Province/State", 
                "Last_Update": "Last Update",
                "Lat": "Latitude",
                "Long_": "Longitude"
            }, inplace=True)
            collected = pd.concat([collected, daily_report], axis=0, ignore_index=True)
        except:
            print("FAILED TO FETCH DATA FOR: %s" % date_str)
    return collected
report_data = gather_daily_jhu_reports(pd.date_range("1-22-2020", "03-27-2020").to_pydatetime())
report_data.to_csv("output/daily_jhu_report_data.csv", index=0)


Collecting data from: 01-22-2020
Collecting data from: 01-23-2020
Collecting data from: 01-24-2020
Collecting data from: 01-25-2020
Collecting data from: 01-26-2020
Collecting data from: 01-27-2020
Collecting data from: 01-28-2020
Collecting data from: 01-29-2020
Collecting data from: 01-30-2020
Collecting data from: 01-31-2020
Collecting data from: 02-01-2020
Collecting data from: 02-02-2020
Collecting data from: 02-03-2020
Collecting data from: 02-04-2020
Collecting data from: 02-05-2020
Collecting data from: 02-06-2020
Collecting data from: 02-07-2020
Collecting data from: 02-08-2020
Collecting data from: 02-09-2020
Collecting data from: 02-10-2020
Collecting data from: 02-11-2020
Collecting data from: 02-12-2020
Collecting data from: 02-13-2020
Collecting data from: 02-14-2020
Collecting data from: 02-15-2020
Collecting data from: 02-16-2020
Collecting data from: 02-17-2020
Collecting data from: 02-18-2020
Collecting data from: 02-19-2020
Collecting data from: 02-20-2020
Collecting

In [4]:
jhu_data = pd.read_csv("output/daily_jhu_report_data.csv")
# Failed attempt at extracting NYC data from JHU, seems to have a big gap in data?
selector = np.logical_or(
    jhu_data["Combined_Key"] == "New York City, New York, US", 
    jhu_data["Province/State"] == "New York City, NY"
)
selector = np.logical_or(selector, jhu_data["Province/State"] == "New York County, NY")
selector = np.logical_or(selector, jhu_data["Admin2"] == "New York City")
jhu_nyc_data = jhu_data[selector]
jhu_nyc_data.tail(10)

Unnamed: 0,Date,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude,FIPS,Admin2,Active,Combined_Key
3642,2020-03-06,"New York County, NY",US,2020-03-06T19:13:40,16.0,0.0,0.0,40.7128,-74.006,,,,
3853,2020-03-07,"New York County, NY",US,2020-03-07T18:23:05,11.0,0.0,0.0,40.7128,-74.006,,,,
4084,2020-03-08,"New York County, NY",US,2020-03-08T04:13:22,12.0,0.0,0.0,40.7128,-74.006,,,,
4327,2020-03-09,"New York County, NY",US,2020-03-09T17:13:16,19.0,0.0,0.0,40.7128,-74.006,,,,
7617,2020-03-22,New York,US,3/22/20 23:45,9654.0,63.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"
13101,2020-03-23,New York,US,2020-03-23 23:19:34,12305.0,99.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"
16516,2020-03-24,New York,US,2020-03-24 23:37:31,14904.0,131.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"
19933,2020-03-25,New York,US,2020-03-25 23:33:19,17856.0,199.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"
23352,2020-03-26,New York,US,2020-03-26 23:48:35,21873.0,281.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"
26773,2020-03-27,New York,US,2020-03-27 22:14:55,25573.0,366.0,0.0,40.767273,-73.971526,36061.0,New York City,0.0,"New York City, New York, US"


## Extract data from covid.direct
https://covid-19.direct/county/NY/New%20York%20City

In [5]:
nyc_pop = 8623000
raw_data = pd.read_json("covid_direct_3-25-20.json")
clean_nyc_data = raw_data[["confirmed", "death", "fulldate", "newcase"]]
clean_nyc_data.dropna(inplace=True)
clean_nyc_data["fulldate"] = pd.to_datetime(clean_nyc_data["fulldate"])
clean_nyc_data.set_index("fulldate", inplace=True)
clean_nyc_data[["confirmed", "death"]].iplot(xTitle="Date", yTitle="# of Patients", title="NYC COVID-19 Counts (covid.direct)")
(clean_nyc_data[["confirmed", "death"]]*100000/nyc_pop).iplot(xTitle="Date", yTitle="# of Patients per 100k People", title="NYC COVID-19 Counts per 100k (JHU)")
clean_nyc_data

Unnamed: 0_level_0,confirmed,death,newcase
fulldate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-10,0.0,0.0,0.0
2020-02-11,0.0,0.0,0.0
2020-02-12,0.0,0.0,0.0
2020-02-13,0.0,0.0,0.0
2020-02-14,0.0,0.0,0.0
2020-02-15,0.0,0.0,0.0
2020-02-16,0.0,0.0,0.0
2020-02-17,0.0,0.0,0.0
2020-02-18,0.0,0.0,0.0
2020-02-19,0.0,0.0,0.0


## Fetch data from csbs.org

https://github.com/tomquisel/covid19-data/tree/master/data/csv

In [6]:
daily_csbs_report_url_base = "https://raw.githubusercontent.com/tomquisel/covid19-data/master/data/csv/%s.csv"
def gather_daily_csbs_reports(date_rng):
    collected = pd.DataFrame()
    for date in date_rng:
        date_str = date.strftime("%Y-%m-%d")
        print("Collecting data from: %s" % date_str)
        try:
            daily_report = pd.read_csv(daily_csbs_report_url_base % date_str)
            daily_report.insert(0, "Date", date)
            collected = pd.concat([collected, daily_report], axis=0, ignore_index=True)
        except:
            print("FAILED TO FETCH DATA FOR: %s" % date_str)
    return collected
report_data = gather_daily_csbs_reports(pd.date_range("03-14-2020", "03-27-2020").to_pydatetime())
report_data.to_csv("output/daily_csbs_report_data.csv", index=0)


Collecting data from: 2020-03-14
Collecting data from: 2020-03-15
Collecting data from: 2020-03-16
Collecting data from: 2020-03-17
Collecting data from: 2020-03-18
Collecting data from: 2020-03-19
Collecting data from: 2020-03-20
Collecting data from: 2020-03-21
Collecting data from: 2020-03-22
Collecting data from: 2020-03-23
Collecting data from: 2020-03-24
Collecting data from: 2020-03-25
Collecting data from: 2020-03-26
Collecting data from: 2020-03-27


In [7]:
csbs_data = pd.read_csv("output/daily_csbs_report_data.csv")
nyc_csbs_data = csbs_data[csbs_data["County_Name"] == "New York"]
nyc_csbs_data["Date"] = pd.to_datetime(nyc_csbs_data["Date"])
nyc_csbs_data.set_index("Date", inplace=True)
nyc_csbs_data[["Confirmed", "Death"]].iplot(xTitle="Date", yTitle="# of Patients", title="NYC COVID-19 Counts (CSBS)")

# 