# European Centre for Disease Prevention and Control Dataset

In [None]:
import pandas as pd
import datetime
import pycountry
import re
import os
import numpy as np

In [None]:
# papermill parameters
output_folder = "../output/"

### Fetch data

In [None]:
df = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/csv")

### Parse date

In [None]:
df["dateRep"] = pd.to_datetime(df["year_week"] + "-1", format="%Y-%W-%w")
df = df.pivot(index=["country", "country_code", "continent", "population", "dateRep", "year_week"], columns="indicator", values="weekly_count")
df = df.reset_index()

In [None]:
df = df.sort_values(by=["country", "dateRep"])

### Add difference

In [None]:
df['CASES_SINCE_PREV_WEEK'] = df.groupby(['country','continent'])['cases'].diff().fillna(0).astype(int)
df['DEATHS_SINCE_PREV_WEEK'] = df.groupby(['country','continent'])['deaths'].diff().fillna(0).astype(int)

### Drop cols

In [None]:
df = df.drop(columns=["year_week"])

### Resolve Country/Region name

In [None]:
country_codes = df["country_code"].unique()
for code in country_codes:
    try:
        pyc = pycountry.countries.get(alpha_2=code)
        if pyc:
            df["country"].loc[code == df["country_code"]] = pyc.name
    except LookupError:
        df["country"].loc[code == df["country_code"]] = None

### Set Last Update Date and Last Reported Flag

In [None]:
df["LAST_UPDATE_DATE"] = datetime.datetime.utcnow()
df["LAST_REPORTED_FLAG"] = df["dateRep"].max() == df["dateRep"]

### Rename Cols

In [None]:
df = df.rename(columns={
    "dateRep": "DATE", 
    "country": "COUNTRY_REGION", 
    "continent": "continentExp",
    "country_code": "ISO3166_1", 
    "population": "POPULATION",
    "cases": "cases_weekly",
    "deaths": "deaths_weekly"
})

### Save dataframe

In [None]:
df.to_csv(output_folder + "ECDC_GLOBAL_WEEKLY.csv", index=False, columns=[
    "COUNTRY_REGION",
    "continentExp",
    "ISO3166_1",
    "cases_weekly",
    "deaths_weekly",
    "CASES_SINCE_PREV_WEEK",
    "DEATHS_SINCE_PREV_WEEK",
    "POPULATION",
    "DATE",
    "LAST_UPDATE_DATE",
    "LAST_REPORTED_FLAG"
])