# Download case data

This notebook downloads COVID-19 case data from various sources and stores them in a generic triple store.

In [1]:
import pandas as pd
import io
from statsmodels.tsa.seasonal import seasonal_decompose
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine
import os
try:
    from project_lib import Project
    CLOUDPAK = True
except ModuleNotFoundError:
    class Project():
        def __init__(self):
            pass
        def get_name(self):
            return "Download case data"
    CLOUDPAK = False
import datetime

In [2]:
if CLOUDPAK:
    project = Project.access()
    autos_credentials = project.get_connection(name="db2 Warehouse ealuser")
    sql_url = "db2+ibm_db://{username}:{password}@{host}:{port}/{database};Security=ssl;".format(**autos_credentials)
elif "SQL_CONNECT" not in list(os.environ.keys()):
    sql_url = "sqlite:///database.sqlite" # in case you want to run local
    sql_url = "postgresql://cookiecutter:cookiecutter@localhost:5432/cookiec"
else:
    sql_url = os.environ["SQL_CONNECT"]
conn = create_engine(sql_url)

## Compute Waves from detecting changepoints

This is a simple change point detector that determines if (infection) numbers were increasing for equal or more `THRESHOLD_UP` (14) days, or decreasing for more than `THRESHOLD_DOWN` (28) days, it then takes the first datetime of that period as a starting point.

In [3]:
def create_log_entry(conn,notebook_name="data-ingestion/Download case data",tablename="",num_records_before=0,num_records_after=0,most_recent_datapoint=datetime.datetime(1980,1,1),error_code=0):
    p = Project()
    dfLogfile = pd.DataFrame({"JOB_NAME":["{}-{}".format(p.get_name(),notebook_name)],
                             "NOTEBOOK_NAME":[notebook_name],
                             "TABLE_NAME":[tablename],
                             "DATETIME":[datetime.datetime.now()],
                             "NUM_RECORDS":[num_records_after],
                             "MOST_RECENT_DATETIME":[most_recent_datapoint],
                             "RESULT":error_code,
                             "MESSAGE":["Number of Records before {} and after {}".format(num_records_before,num_records_after)]})

    dfLogfile.to_sql("cp4d_pipeline_status", conn, if_exists='append',dtype={"JOB_NAME":sqlalchemy.types.String(150),
                                                                             "NOTEBOOK_NAME":sqlalchemy.types.String(100),
                                                                             "TABLE_NAME":sqlalchemy.types.String(100),
                                                                             "DATETIME":sqlalchemy.types.DateTime,
                                                                             "MOST_RECENT_DATETIME":sqlalchemy.types.DateTime,
                                                                             "MESSAGE":sqlalchemy.types.String(200)},index=False)

In [4]:
def compute_waves(sCountry,identifier="",country=""):
    # computing waves and "periods of calmness" using a very manual Schmitt-Trigger style detection of gradients up and down
    all_verdicts = []
    
    THRESHOLD = 1
    THRESHOLD_UP = 14
    THRESHOLD_DOWN = 28
    
    data = sCountry.rolling(center=True,window=7).mean().dropna()
        
    datum = data.values[0]
    increasing = 0
    decreasing = 0
    wave_no = 0
    for i,v in data.items():
        if v > datum:
            if increasing == 0:
                start_date = i
            increasing += 1
            if increasing > 3:
                decreasing = 0
        elif v < datum:
            decreasing += 1
            if decreasing > 3:
                increasing = 0

        if increasing == THRESHOLD_UP:
            wave_no += 1
            if len(all_verdicts)>0 and all_verdicts[-1]["kind"] == "begin":
                pass
            else:
                all_verdicts.append({"name":country,"datetime_date":i,"kind":"begin","wave_no":wave_no,"identifier":identifier})
        if decreasing == THRESHOLD_DOWN:
            if len(all_verdicts)>0 and all_verdicts[-1]["kind"] == "end":
                all_verdicts.pop()
                all_verdicts.append({"name":country,"datetime_date":i,"kind":"end","wave_no":wave_no,"identifier":identifier})
            else:
                all_verdicts.append({"name":country,"datetime_date":i,"kind":"end","wave_no":wave_no,"identifier":identifier})
        datum = v

    if len(all_verdicts) > 0:
        dfWaves = pd.DataFrame(all_verdicts)
        dfWaves = dfWaves.sort_values(["name","datetime_date"])
        return dfWaves
    else:
        return pd.DataFrame({"name":[],"datetime_date":[],"kind":[],"wave_no":[]})

## Johns Hopkins data

This is a global dataset with certain deficiencies in that case data are not updated retrospectively.

In [5]:
dfMapping = pd.read_sql("johns_hopkins_country_mapping",conn)

dfJH = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv?raw=true",
               encoding="utf-8")

dfJHcountries = dfJH[dfJH["Province/State"].isnull()]
dfJHcountries = dfJHcountries.merge(dfMapping,left_on="Country/Region",right_on="name")
try:
    dfJHcountries = dfJHcountries[dfJHcountries["ISO_3_code_i"]>0]
    del dfJHcountries["ISO_3_code_i"]
except:
    dfJHcountries = dfJHcountries[dfJHcountries["iso_3_code_i"]>0]
    del dfJHcountries["iso_3_code_i"]
del dfJHcountries["Lat"]
del dfJHcountries["Long"]
del dfJHcountries["Province/State"]
del dfJHcountries["Country/Region"]
del dfJHcountries["adm0_a3"]

dfJHcountries.index = dfJHcountries.name
dfJHcountries.index.name = None
del dfJHcountries["name"]
dfJHcountries = dfJHcountries.transpose()

In [6]:
dfJHcountries.index = pd.to_datetime(dfJHcountries.index)
dfJHcountries = dfJHcountries.diff(1).dropna()

dfJHcountries_trend = dfJHcountries.copy()
for c in dfJHcountries_trend.columns:
    try:
        dfJHcountries_trend[c] = seasonal_decompose(dfJHcountries_trend[c],period=7).trend
    except TypeError:
        dfJHcountries_trend[c] = seasonal_decompose(dfJHcountries_trend[c],freq=7).trend

dfnew_cases = dfJHcountries.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"new_cases"})
dftrend = dfJHcountries_trend.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"trend"})
df = pd.merge(dfnew_cases,dftrend,how="left",left_on=["datetime_date","name"],right_on=["datetime_date","name"])
df["new_cases"] = df["new_cases"].astype(int)
df["data_source"] = "Johns Hopkins global"

df = df.merge(dfMapping,on="name").rename(columns={"adm0_a3":"identifier"})
#del df["ISO_3_code_i"]

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_case_data WHERE data_source='Johns Hopkins global'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_case_data WHERE data_source='Johns Hopkins global'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
df.to_sql("cookiecutter_case_data",conn,index=False,dtype={"datetime_date":sqlalchemy.types.DateTime,
                                                          "name":sqlalchemy.types.VARCHAR(100),
                                                           "identifier":sqlalchemy.types.VARCHAR(10),
                                                           "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")

create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_case_data/Johns Hopkins global",
                 num_records_before=num_records_before,num_records_after=len(df),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)


allwaves = []
for c in dfJHcountries.columns:
    allwaves.append(compute_waves(dfJHcountries[c],country=c,identifier=dfMapping[dfMapping.name == c].adm0_a3.unique()[0]))
dfWaves = pd.DataFrame().append(allwaves,sort=True)
dfWaves["wave_no"] = dfWaves["wave_no"].astype(int)
dfWaves["data_source"] = "Johns Hopkins global"
try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_computed_waves_chgpoint WHERE data_source='Johns Hopkins global'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_computed_waves_chgpoint WHERE data_source='Johns Hopkins global'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
dfWaves.to_sql("cookiecutter_computed_waves_chgpoint",conn,index=False,dtype={"name":sqlalchemy.types.VARCHAR(100),
                                                                             "datetime_date":sqlalchemy.types.DateTime,
                                                                             "kind":sqlalchemy.types.VARCHAR(10),
                                                                             "identifier":sqlalchemy.types.VARCHAR(10),
                                                                             "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")

create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_computed_waves_chgpoint/Johns Hopkins global",
                 num_records_before=num_records_before,num_records_after=len(dfWaves),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)

## Johns Hopkins US Data

The data are down to Counties but we aggregate them to States.

In [7]:
dfJHUS = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv?raw=true",
               encoding="utf-8")
dfJHUS = dfJHUS[dfJHUS.FIPS.notnull()]

state_to_postal = dict(zip(['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia',
          'Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts',
          'Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey',
          'New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island',
          'South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia',
          'Wisconsin','Wyoming','American Samoa','Guam','Northern Mariana Islands','Puerto Rico','Virgin Islands'],
         ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD',
          'MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC',
          'SD','TN','TX','UT','VT','VA','WA','WV','WI','WY','AS','GU','MP','PR','VI']))

dfJHUS["name"] = ""
for i,row in dfJHUS.iterrows():
    try:
        dfJHUS.at[i,"name"] = "US-"+state_to_postal[row["Province_State"]]+" "+row["Province_State"]
    except:
        continue

dfJHUS = dfJHUS[dfJHUS.name > ""].groupby("name").sum()
del dfJHUS["UID"]
del dfJHUS["code3"]
del dfJHUS["FIPS"]
del dfJHUS["Lat"]
del dfJHUS["Long_"]
dfJHUS.index.name = None
dfJHUS = dfJHUS.transpose()
dfJHUS.index = pd.to_datetime(dfJHUS.index)
dfJHUS = dfJHUS.diff(1).dropna()

dfnew_cases = dfJHUS.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"new_cases"})

dfJHUS_trend = dfJHUS.copy()
for c in dfJHUS_trend.columns:
    try:
        dfJHUS_trend[c] = seasonal_decompose(dfJHUS_trend[c],period=7).trend
    except TypeError:
        dfJHUS_trend[c] = seasonal_decompose(dfJHUS_trend[c],freq=7).trend

dftrend = dfJHUS_trend.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"trend"})

df = pd.merge(dfnew_cases,dftrend,how="left",left_on=["datetime_date","name"],right_on=["datetime_date","name"])
df["new_cases"] = df["new_cases"].astype(int)
df["data_source"] = "Johns Hopkins US States"

dfCodeMapping = df.name.str.split(expand=True).rename(columns={0:"identifier"})
del dfCodeMapping[1]
del dfCodeMapping[2]
del dfCodeMapping[3]
df = df.join(dfCodeMapping)

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_case_data WHERE data_source='Johns Hopkins US States'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_case_data WHERE data_source='Johns Hopkins US States'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
    
df.to_sql("cookiecutter_case_data",conn,index=False,dtype={"datetime_date":sqlalchemy.types.DateTime,
                                                          "name":sqlalchemy.types.VARCHAR(100),
                                                          "identifier":sqlalchemy.types.VARCHAR(10),
                                                           "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_case_data/Johns Hopkins US States",
                 num_records_before=num_records_before,num_records_after=len(df),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)

allwaves = []
for c in dfJHUS.columns:
    allwaves.append(compute_waves(dfJHUS[c],country=c,identifier=c.split(" ")[0]))
dfWaves = pd.DataFrame().append(allwaves,sort=True)
dfWaves["wave_no"] = dfWaves["wave_no"].astype(int)
dfWaves["data_source"] = "Johns Hopkins US States"
try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_computed_waves_chgpoint WHERE data_source='Johns Hopkins US States'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_computed_waves_chgpoint WHERE data_source='Johns Hopkins US States'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
dfWaves.to_sql("cookiecutter_computed_waves_chgpoint",conn,index=False,dtype={"name":sqlalchemy.types.VARCHAR(100),
                                                                             "datetime_date":sqlalchemy.types.DateTime,
                                                                             "kind":sqlalchemy.types.VARCHAR(10),
                                                                             "identifier":sqlalchemy.types.VARCHAR(10),
                                                                             "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_computed_waves_chgpoint/Johns Hopkins US States",
                 num_records_before=num_records_before,num_records_after=len(dfWaves),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)

In [8]:
conn = engine.connect()
dfOxCGRT = pd.read_csv("https://github.com/OxCGRT/covid-policy-tracker/raw/master/data/OxCGRT_latest.csv",low_memory=False)
dfOxCGRT["datetime_date"] = pd.to_datetime(dfOxCGRT.Date,format="%Y%m%d")
dfOxCGRT.columns = [c.lower() for c in dfOxCGRT.columns]
dfOxCGRT

dfOxCGRT.to_sql("oxford_stringency_index", conn,if_exists='replace',dtype={'datetime_date': sqlalchemy.types.Date,
                                                                              'countrycode':sqlalchemy.types.String(3),
                                                                              'countryname':sqlalchemy.types.String(150),
                                                                              'regioncode':sqlalchemy.types.String(20),
                                                                              'regionname':sqlalchemy.types.String(150),
                                                                              'entry_id_for_country':sqlalchemy.types.String(20)},index=False)
conn.close()

NameError: name 'engine' is not defined

## European Centre of Disease Control (ECDC) data

In [None]:
dfECDC = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/data.csv",encoding="utf-8")

ddf = dfECDC[["countriesAndTerritories","countryterritoryCode"]].drop_duplicates()
ecdc_countries_to_adm0_a3 = dict(zip(ddf.countriesAndTerritories.values,ddf.countryterritoryCode.values))

dfECDC.index = pd.to_datetime(dfECDC.dateRep,format="%d/%m/%Y")
dfECDC.index.name = None
dfECDC = dfECDC[["cases","countriesAndTerritories"]]
dfECDC = pd.pivot_table(dfECDC,columns=["countriesAndTerritories"],values="cases",index=dfECDC.index).fillna(0)

dfECDC_trend = dfECDC.copy()

for c in dfECDC_trend.columns:
    try:
        dfECDC_trend[c] = seasonal_decompose(dfECDC_trend[c],period=7).trend
    except TypeError:
        dfECDC_trend[c] = seasonal_decompose(dfECDC_trend[c],freq=7).trend

dfnew_cases = dfECDC.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"new_cases","countriesAndTerritories":"name"})
dftrend = dfECDC_trend.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"trend","countriesAndTerritories":"name"})
df = pd.merge(dfnew_cases,dftrend,how="left",left_on=["datetime_date","name"],right_on=["datetime_date","name"])
df["new_cases"] = df["new_cases"].astype(int)
df["data_source"] = "ECDC global"

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_case_data WHERE data_source='ECDC global'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_case_data WHERE data_source='ECDC global'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
    
df = df.merge(ddf,left_on="name",right_on="countriesAndTerritories").rename(columns={"countryterritoryCode":"identifier"})
del df["countriesAndTerritories"]
df.to_sql("cookiecutter_case_data",conn,index=False,dtype={"datetime_date":sqlalchemy.types.DateTime,
                                                          "name":sqlalchemy.types.VARCHAR(100),
                                                           "identifier":sqlalchemy.types.VARCHAR(10),
                                                           "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_case_data/ECDC global",
                 num_records_before=num_records_before,num_records_after=len(df),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)

allwaves = []
for c in dfECDC.columns:
    allwaves.append(compute_waves(dfECDC[c],country=c,identifier=ecdc_countries_to_adm0_a3[c]))
dfWaves = pd.DataFrame().append(allwaves,sort=True)
dfWaves["wave_no"] = dfWaves["wave_no"].astype(int)
dfWaves["data_source"] = "ECDC global"

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_computed_waves_chgpoint WHERE data_source='ECDC global'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_computed_waves_chgpoint WHERE data_source='ECDC global'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
    
dfWaves.to_sql("cookiecutter_computed_waves_chgpoint",conn,index=False,dtype={"name":sqlalchemy.types.VARCHAR(100),
                                                                             "datetime_date":sqlalchemy.types.DateTime,
                                                                             "kind":sqlalchemy.types.VARCHAR(10),
                                                                             "identifier":sqlalchemy.types.VARCHAR(10),
                                                                             "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_computed_waves_chgpoint/ECDC global",
                 num_records_before=num_records_before,num_records_after=len(dfWaves),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)

## Germany Robert-Koch Institut (RKI) Data

In [None]:
bundesland_code = {'Baden-Württemberg': 'DE-BW', 'Bayern': 'DE-BY', 'Berlin': 'DE-BE', 'Brandenburg': 'DE-BB', 'Bremen': 'DE-HB',
                   'Hamburg': 'DE-HH', 'Hessen': 'DE-HE', 'Mecklenburg-Vorpommern': 'DE-MV', 'Niedersachsen': 'DE-NI',
                   'Nordrhein-Westfalen': 'DE-NW', 'Rheinland-Pfalz': 'DE-RP', 'Saarland': 'DE-SL', 'Sachsen': 'DE-SN',
                   'Sachsen-Anhalt': 'DE-ST', 'Schleswig-Holstein': 'DE-SH', 'Thüringen': 'DE-TH'}

dfRKI = pd.read_csv("https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data",encoding="utf-8")
dfRKI.index = pd.to_datetime(dfRKI.Refdatum)
dfRKI["datetime_date"] = dfRKI.index

dfRKI = dfRKI[["Bundesland","datetime_date","AnzahlFall"]].groupby(["Bundesland","datetime_date"]).sum().reset_index().fillna(0)
dfRKI = pd.pivot_table(dfRKI,columns=["Bundesland"],index=["datetime_date"],values="AnzahlFall").fillna(0)

dfRKI_trend = dfRKI.copy()

for c in dfRKI_trend.columns:
    try:
        dfRKI_trend[c] = seasonal_decompose(dfRKI_trend[c],period=7).trend
    except TypeError:
        dfRKI_trend[c] = seasonal_decompose(dfRKI_trend[c],freq=7).trend

dfnew_cases = dfRKI.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"new_cases","countriesAndTerritories":"name","Bundesland":"name"})
dftrend = dfRKI_trend.stack().reset_index().rename(columns={"level_0":"datetime_date","level_1":"name",0:"trend","countriesAndTerritories":"name","Bundesland":"name"})

df = pd.merge(dfnew_cases,dftrend,how="left",left_on=["datetime_date","name"],right_on=["datetime_date","name"])
df["new_cases"] = df["new_cases"].astype(int)
df["data_source"] = "RKI D"
dfBundeslandMapping = pd.DataFrame(zip(bundesland_code.keys(),bundesland_code.values())).rename(columns={0:"name",1:"identifier"})
df = df.merge(dfBundeslandMapping,on="name")

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_case_data WHERE data_source='RKI D'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_case_data WHERE data_source='RKI D'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
df.to_sql("cookiecutter_case_data",conn,index=False,dtype={"datetime_date":sqlalchemy.types.DateTime,
                                                           "name":sqlalchemy.types.VARCHAR(100),
                                                           "identifier":sqlalchemy.types.VARCHAR(10),
                                                           "data_source":sqlalchemy.types.VARCHAR(30)},
          if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_case_data/RKI D",
                 num_records_before=num_records_before,num_records_after=len(df),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)


allwaves = []
for c in dfRKI.columns:
    allwaves.append(compute_waves(dfRKI[c],country=c,identifier=bundesland_code[c]))
dfWaves = pd.DataFrame().append(allwaves,sort=True)
dfWaves["wave_no"] = dfWaves["wave_no"].astype(int)
dfWaves["data_source"] = "RKI D"

try:
    r = conn.execute("SELECT COUNT(*) FROM cookiecutter_computed_waves_chgpoint WHERE data_source='RKI D'")
    num_records_before = int(r.fetchone()[0])
    conn.execute("DELETE FROM cookiecutter_computed_waves_chgpoint WHERE data_source='RKI D'")
    error_code = 0
except:
    num_records_before = 0
    error_code = 1
dfWaves.to_sql("cookiecutter_computed_waves_chgpoint",conn,index=False,dtype={"name":sqlalchemy.types.VARCHAR(100),
                                                                             "datetime_date":sqlalchemy.types.DateTime,
                                                                             "kind":sqlalchemy.types.VARCHAR(10),
                                                                             "identifier":sqlalchemy.types.VARCHAR(10),
                                                                             "data_source":sqlalchemy.types.VARCHAR(30)},
         if_exists="append")
create_log_entry(conn,notebook_name="data-ingestion/Download case data",
                 tablename="cookiecutter_computed_waves_chgpoint/RKI D",
                 num_records_before=num_records_before,num_records_after=len(dfWaves),
                 most_recent_datapoint=df["datetime_date"].max(),error_code=error_code)


## One More Thing: OxCGRT data

In [None]:
conn = engine.connect()
dfOxCGRT = pd.read_csv("https://github.com/OxCGRT/covid-policy-tracker/raw/master/data/OxCGRT_latest.csv",low_memory=False)
dfOxCGRT["datetime_date"] = pd.to_datetime(dfOxCGRT.Date,format="%Y%m%d")
dfOxCGRT.columns = [c.lower() for c in dfOxCGRT.columns]
dfOxCGRT

dfOxCGRT.to_sql("oxford_stringency_index", conn,if_exists='replace',dtype={'datetime_date': sqlalchemy.types.Date,
                                                                              'countrycode':sqlalchemy.types.String(3),
                                                                              'countryname':sqlalchemy.types.String(150),
                                                                              'regioncode':sqlalchemy.types.String(20),
                                                                              'regionname':sqlalchemy.types.String(150),
                                                                              'entry_id_for_country':sqlalchemy.types.String(20)},index=False)
conn.close()