In this notebook are shown the codes used to extract and transform the data saved in the [Epigraphhub platform](https://epigraphhub.org/) and save it in the data folder.

In [7]:
import pandas as pd
from epigraphhub.data.foph import get_cluster_data

The code below is used to get the data to train the models: 

In [4]:

d_cantons = {'GE': ['FR', 'GE', 'JU', 'NE', 'TI', 'VD', 'VS'] , 'BE': ['AG', 'AI', 'AR', 'BE', 'BL', 'BS', 'GL', 'GR', 'LU', 'NW', 'OW',
        'SG', 'SH', 'SO', 'SZ', 'TG', 'UR', 'ZG', 'ZH'], 'ZH': ['AG', 'AI', 'AR', 'BE', 'BL', 'BS', 'GL', 'GR', 'LU', 'NW', 'OW',
        'SG', 'SH', 'SO', 'SZ', 'TG', 'UR', 'ZG', 'ZH']} 

for canton in d_cantons: 
    df = get_cluster_data('switzerland', ['foph_test_d', 'foph_cases_d', 'foph_hosp_d'], d_cantons[canton] , vaccine = True, smooth = True)  
    
    df.to_csv(f'data_article/data_{canton}.csv')

The code below is used to get the data of cases, hosp and tests: 

In [5]:
from sqlalchemy import create_engine
from epigraphhub.settings import env

with env.db.credentials[env.db.default_credential] as credential:
    engine = create_engine(
        f"postgresql://{credential.username}:"
        f"{credential.password}@{credential.host}:{credential.port}/"
        f"{credential.dbname}"
    )


In [8]:

cases = pd.read_sql_table('foph_cases_d', engine, schema = 'switzerland')
cases = cases[['datum', 'georegion', 'entries']]
cases.set_index('datum', inplace = True)
cases.index = pd.to_datetime(cases.index)
cases = cases[:'2022-06-30']
cases.to_csv('data_article/cases_swiss.py')

In [11]:
hosp = pd.read_sql_table('foph_hosp_d', engine, schema = 'switzerland')
hosp = hosp[['datum', 'georegion', 'entries']]
hosp.set_index('datum', inplace = True)
hosp.index = pd.to_datetime(hosp.index)
hosp = cases[:'2022-06-30']
hosp.to_csv('data_article/hosp_swiss.py')

In [12]:
tests = pd.read_sql_table('foph_test_d', engine, schema = 'switzerland')

tests = tests[['datum', 'georegion', 'entries', 'entries_pos', 'entries_neg']]
tests.set_index('datum', inplace = True)
tests.index = pd.to_datetime(tests.index)
tests.dropna(subset = 'georegion', inplace = True)
tests = tests[:'2022-06-30']
tests.to_csv('data_article/tests_swiss.py')