# Description

This Jupyter Notebook contains the code to download the NYT COVID-19 dataset from GitHub and import it into a Pandas DataFrame to be manipulated using Python.

This notebook is intended to be run in [Google Colaboratory](https://colab.research.google.com/).

Please note that this notebook's default kernel is Python. If you would like to run R code in it, you must pass the R code to an R kernel using Python magic functions: the cell magic function `%%R` or the line magic function `%R`.

# Set up notebook

In [1]:
# Import packages
import pandas as pd

# Enable interactive display of tabular data in Colab
%load_ext google.colab.data_table

# Download and import data

In [2]:
# Download NYT COVID-19 dataset from GitHub
%%shell
cd /
git clone https://github.com/nytimes/covid-19-data.git

Cloning into 'covid-19-data'...
remote: Enumerating objects: 15, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 3784 (delta 5), reused 8 (delta 1), pack-reused 3769[K
Receiving objects: 100% (3784/3784), 36.97 MiB | 16.42 MiB/s, done.
Resolving deltas: 100% (2165/2165), done.




In [3]:
# Import and display all data
all_data = pd.read_csv('/covid-19-data/us-counties.csv', engine='c')
all_data

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0
...,...,...,...,...,...,...
337563,2020-07-15,Sweetwater,Wyoming,56037.0,157,1
337564,2020-07-15,Teton,Wyoming,56039.0,170,1
337565,2020-07-15,Uinta,Wyoming,56041.0,208,0
337566,2020-07-15,Washakie,Wyoming,56043.0,42,5


In [4]:
# Import and display excess_deaths data
excess_deaths = pd.read_csv('/covid-19-data/excess-deaths/deaths.csv', engine='c')
excess_deaths

Unnamed: 0,country,placename,frequency,start_date,end_date,year,month,week,deaths,expected_deaths,excess_deaths,baseline
0,Austria,,weekly,2020-01-06,2020-01-12,2020,1,2.0,1702,1814.0,-112.0,2015-2019 historical data
1,Austria,,weekly,2020-01-13,2020-01-19,2020,1,3.0,1797,1824.0,-27.0,2015-2019 historical data
2,Austria,,weekly,2020-01-20,2020-01-26,2020,1,4.0,1778,1832.0,-54.0,2015-2019 historical data
3,Austria,,weekly,2020-01-27,2020-02-02,2020,2,5.0,1947,1836.0,111.0,2015-2019 historical data
4,Austria,,weekly,2020-02-03,2020-02-09,2020,2,6.0,1678,1835.0,-157.0,2015-2019 historical data
...,...,...,...,...,...,...,...,...,...,...,...,...
6707,United States,,weekly,,,2015-2019 average,11,47.0,47199,,,
6708,United States,,weekly,,,2015-2019 average,12,48.0,47946,,,
6709,United States,,weekly,,,2015-2019 average,12,49.0,48758,,,
6710,United States,,weekly,,,2015-2019 average,12,50.0,49557,,,


In [7]:
# Multiindex data for better display
all_data_multiindexed = all_data.set_index(['date', 'state', 'county'])
all_data_multiindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fips,cases,deaths
date,state,county,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-21,Washington,Snohomish,53061.0,1,0
2020-01-22,Washington,Snohomish,53061.0,1,0
2020-01-23,Washington,Snohomish,53061.0,1,0
2020-01-24,Illinois,Cook,17031.0,1,0
2020-01-24,Washington,Snohomish,53061.0,1,0
...,...,...,...,...,...
2020-07-15,Wyoming,Sweetwater,56037.0,157,1
2020-07-15,Wyoming,Teton,56039.0,170,1
2020-07-15,Wyoming,Uinta,56041.0,208,0
2020-07-15,Wyoming,Washakie,56043.0,42,5


In [8]:
# Multiindex data for better display
excess_deaths_multiindexed = excess_deaths.set_index(['country', 'placename'])
excess_deaths_multiindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,frequency,start_date,end_date,year,month,week,deaths,expected_deaths,excess_deaths,baseline
country,placename,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Austria,,weekly,2020-01-06,2020-01-12,2020,1,2.0,1702,1814.0,-112.0,2015-2019 historical data
Austria,,weekly,2020-01-13,2020-01-19,2020,1,3.0,1797,1824.0,-27.0,2015-2019 historical data
Austria,,weekly,2020-01-20,2020-01-26,2020,1,4.0,1778,1832.0,-54.0,2015-2019 historical data
Austria,,weekly,2020-01-27,2020-02-02,2020,2,5.0,1947,1836.0,111.0,2015-2019 historical data
Austria,,weekly,2020-02-03,2020-02-09,2020,2,6.0,1678,1835.0,-157.0,2015-2019 historical data
...,...,...,...,...,...,...,...,...,...,...,...
United States,,weekly,,,2015-2019 average,11,47.0,47199,,,
United States,,weekly,,,2015-2019 average,12,48.0,47946,,,
United States,,weekly,,,2015-2019 average,12,49.0,48758,,,
United States,,weekly,,,2015-2019 average,12,50.0,49557,,,
