# Collect data

## Data sources

- [Johns Hopkins University - Time Series](https://github.com/CSSEGISandData/COVID-19)
- [Johns Hopkins University - Vaccination](https://github.com/govex/COVID-19/)

## Load libraries

In [1]:
import requests
import covid_analysis.utils.paths as path

## Utility functions

In [2]:
def download_csv(url: str, out_file: path.Path) -> None:
    request = requests.get(url)
    content = request.content

    with open(out_file, "wb") as file_content:
        file_content.write(content)


## Define default output directory

In [3]:
output_dir = path.data_raw_dir()
output_dir.mkdir(parents=True, exist_ok=True)

## Download Johns Hopkins University time series

In [4]:
hopkins_base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"

hopkins_filenames = (
    "time_series_covid19_confirmed_global.csv",
    "time_series_covid19_deaths_global.csv"
)

hopkins_time_series_urls = {
    path.data_raw_dir(file_name): f"{hopkins_base_url}{file_name}"
    for file_name in hopkins_filenames
}

In [5]:
[
    download_csv(url, out_path) for out_path, url in hopkins_time_series_urls.items()
];

## Download Johns Hopkins University countries metadata

In [6]:
countries_meta_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
countries_meta_filename = output_dir.joinpath("UID_ISO_FIPS_LookUp_Table.csv")

In [7]:
download_csv(countries_meta_url, countries_meta_filename);

## Download Johns Hopkins University vaccination time series

In [8]:
vaccination_url = "https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/global_data/time_series_covid19_vaccine_global.csv"
vaccination_filename = output_dir.joinpath("time_series_covid19_vaccine_global.csv")

In [9]:
download_csv(vaccination_url, vaccination_filename);