# NYCHealth Coronavirus (COVID-19) data
### Original datasource: https://github.com/nychealth/coronavirus-data

In [None]:
import pandas as pd
import numpy as np
import requests
import json
import re
import csv
from airflow.models import Variable
from datetime import datetime

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
REPO_NAME = 'coronavirus-data'
REPO_OWNER = 'nychealth'
API_ENDPOINT = 'https://api.github.com/repos'
RAW_DATA_ENDPOINT = 'https://raw.githubusercontent.com/'

In [None]:
GIT_USER = Variable.get('GIT_USER', default_var=None)  # get GIT_USER variable from airflow
GIT_TOKEN = Variable.get('GIT_TOKEN', default_var=None)  # get GIT_TOKEN variable from airflow

In [None]:
session = requests.session()
session.auth = (GIT_USER, GIT_TOKEN)  # create session

In [None]:
response = session.get(f'{API_ENDPOINT}/{REPO_OWNER}/{REPO_NAME}/commits')
print(f'{API_ENDPOINT}/{REPO_OWNER}/{REPO_NAME}/commits')
response.text

In [None]:
commit_shas = list(map(lambda commit: (commit['commit']['author']['date'], commit['sha']), json.loads(response.text)))

In [None]:
df = []
for (date, commit) in commit_shas:
    
    response = session.get(f'{RAW_DATA_ENDPOINT}/{REPO_OWNER}/{REPO_NAME}/{commit}/tests-by-zcta.csv')
    if response.status_code == 200:
        
        csv_dict = csv.DictReader(response.text.split('\n'))
        for row in csv_dict:
            
            row.update({'Date': date})  # add commit_date as field: Date
            df.append(row)


In [None]:
df = pd.DataFrame(df)

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%dT%H:%M:%SZ")  # parse date
df = df.replace('NA', np.nan)  # parse NA
df = df.replace('nan', np.nan)  # parse NA

In [None]:
df = df.astype({
    'Positive': 'int32',
    'Total': 'int32',
    'zcta_cum.perc_pos': 'float32'
})

In [None]:
df["Last_Updated_Date"] = datetime.utcnow()
df['Last_Reported_Date'] = df['Date'] == df['Date'].max()

In [None]:
df.head()

In [None]:
df.to_csv(output_folder + "NYC_HEALTH_TESTS.csv")