# CDC testing data

* [CDC testing time series](https://healthdata.gov/dataset/covid-19-diagnostic-laboratory-testing-pcr-testing-time-series)

DDL:
    
```sql
CREATE TABLE CDC_TESTING (
    ISO3166_1 varchar(2),
    ISO3166_2 varchar(2),
    DATE timestamp_ntz,
    POSITIVE integer,
    NEGATIVE integer,
    INCONCLUSIVE integer
)
```

In [None]:
import pandas as pd
import numpy as np
import pycountry
import json
from datetime import datetime
from functools import reduce
import requests
from io import StringIO
import re
from requests import HTTPError
from bs4 import BeautifulSoup

In [None]:
LOOKBACK: int = 4

In [None]:
# papermill parameters

output_folder = "../output/"

In [None]:
# Get today's path

def get_url(day_offset:int=1) -> str:
    y, m, d = datetime.now().year, datetime.now().month, datetime.now().day
    url = f"https://healthdata.gov/sites/default/files/covid-19_diagnostic_lab_testing_{y}{m:02d}{d-day_offset:02d}_2203.csv"
    return url

In [None]:
states = [subdivision.code.replace("US-", "") for subdivision in pycountry.subdivisions if subdivision.code.startswith("US-")]

In [None]:
path = "https://healthdata.gov/dataset/covid-19-diagnostic-laboratory-testing-pcr-testing-time-series"

response = requests.get(path)
assert response.status_code == 200
soup = BeautifulSoup(response.content)
link = soup.find("a", {"class": "data-link"})
assert link
url = link["href"]

In [None]:
df = pd.read_csv(url)
df = df[["state", "overall_outcome", "date", "new_results_reported"]].pivot(index=["state", "date"], values=["new_results_reported"], columns=["overall_outcome"])
df = pd.DataFrame(df.to_records())
df.columns = ["ISO3166_2", "DATE", "INCONCLUSIVE", "POSITIVE", "NEGATIVE"]
df["ISO3166_1"] = "US"
df.to_csv(output_folder + "CDC_TESTING.csv", index=False, columns=["ISO3166_1",
                                                                   "ISO3166_2",
                                                                   "DATE",
                                                                   "POSITIVE",
                                                                   "NEGATIVE",
                                                                   "INCONCLUSIVE"])


In [None]:
# for i in range(LOOKBACK):
#     try:
#         print(get_url(i))
#         df = pd.read_csv(get_url(i))
#         print(f"Read CSV with {i} days' offset.")
#         df = df[["state", "overall_outcome", "date", "new_results_reported"]].pivot(index=["state", "date"], values=["new_results_reported"], columns=["overall_outcome"])
#         df = pd.DataFrame(df.to_records())
#         df.columns = ["ISO3166_2", "DATE", "INCONCLUSIVE", "POSITIVE", "NEGATIVE"]
#         df["ISO3166_1"] = "US"
#         print(df.head())
#         df.to_csv(output_folder + "CDC_TESTING.csv", index=False, columns=["ISO3166_1",
#                                                                            "ISO3166_2",
#                                                                            "DATE",
#                                                                            "POSITIVE",
#                                                                            "NEGATIVE",
#                                                                            "INCONCLUSIVE"])
#         break
#     except Exception:
#         continue