In [1]:
import os
import datetime
from datetime import date, timedelta, timezone
import time
import pandas as pd
import numpy as np
pd.options.display.max_rows = None
import matplotlib.pyplot as plt

cantonKeys = ['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']
#cantonKeys = ['AG']
googleMobDict = dict(zip(cantonKeys,["Aargau","Appenzell Innerrhoden","Appenzell Ausserrhoden","Canton of Bern","Basel-Landschaft","Basel City",
                                                    "Fribourg","Geneva","Glarus","Grisons","Jura","Lucerne","Neuchâtel","Nidwalden","Obwalden","St. Gallen",
                                                    "Schaffhausen","Solothurn","Schwyz","Thurgau","Ticino","Uri","Vaud","Valais","Canton of Zug","Zurich"]))


In [2]:

def getDays(year, offset):
   d = date(year, 1, 1)                    
   d += timedelta(days = offset - d.weekday())  
   while d.year == year:
      yield d
      d += timedelta(days = 7)

listOfMondays = []
for year in [2020,2021]:
    for day in getDays(year, 7):
       listOfMondays.append(day)
    
def addZero(x):
    if len(x)==1:
        return "0"+x
    else:
        return x

yearWeek = [str(x.isocalendar()[0])+addZero(str(x.isocalendar()[1])) for x in listOfMondays]

mondaysByWeekNr = dict(zip(yearWeek,listOfMondays))


for cantonId in cantonKeys:
    df = pd.DataFrame(index=pd.date_range(start=datetime.datetime(2020, 1, 1), end=datetime.datetime(2021, 12, 31)))
    df.index.names = ["Time"]
     
    # weekly age classified FOPH data
    for category in ['Cases','Death','Hosp']: 
        age = pd.read_csv("data/FOPH/data/COVID19"+category+"_geoRegion_AKL10_w.csv")
        age[['datum']] = age[['datum']].applymap(lambda x: mondaysByWeekNr[str(x)])
        age = age.loc[age['geoRegion']==cantonId]
        age = age[['datum','altersklasse_covid19','entries','prct','inz_entries','inzsumTotal']]
        age = age.pivot(index="datum", columns="altersklasse_covid19")
        age.columns = [category+" "+' '.join(col) for col in age.columns.values]
        age = age.drop([category+' entries Unbekannt',category+' prct Unbekannt', category+' inz_entries Unbekannt', category+' inzsumTotal Unbekannt'], axis=1)
        age = age.drop([category+' prct 0 - 9', category+' prct 10 - 19', category+' prct 20 - 29', 
                        category+' prct 30 - 39', category+' prct 40 - 49', category+' prct 50 - 59', 
                        category+' prct 60 - 69', category+' prct 70 - 79', category+' prct 80+'],axis=1)
        df = df.join(age)
      
    # weekly gender classified FOPH data
    for category in ['Cases','Death','Hosp']: 
        gender = pd.read_csv("data/FOPH/data/COVID19"+category+"_geoRegion_sex_w.csv")
        gender[['datum']] = gender[['datum']].applymap(lambda x: mondaysByWeekNr[str(x)])
        gender = gender.loc[gender['geoRegion']==cantonId]
        gender = gender[['datum','sex','entries','prct','inz_entries','inzsumTotal']]
        gender = gender.pivot(index="datum", columns="sex")
        gender.columns = [category+" "+' '.join(col) for col in gender.columns.values]
        gender = gender.drop([category+' entries unknown',category+' inz_entries unknown', category+' inzsumTotal unknown', category+' prct unknown', category+' prct female', category+' prct male'],axis=1)
        df = df.join(gender)
    
    # weekly vaccination age classified FOPH data 
    for category in ['VaccDosesAdministered','FullyVaccPersons']: 
        vacc = pd.read_csv("data/FOPH/data/COVID19"+category+".csv")
        vacc = vacc.loc[vacc['geoRegion']==cantonId]
        vacc = vacc[['date','sumTotal','per100PersonsTotal']]
        vacc = vacc.set_index('date')
        vacc.columns = [category+" "+col for col in vacc.columns]
        df = df.join(vacc)
    
    # daily hospital capacity FOPH data
    capacity = pd.read_csv("data/FOPH/data/COVID19HospCapacity_geoRegion.csv")
    capacity = capacity.loc[capacity['geoRegion']==cantonId]
    capacity = capacity.set_index('date').sort_index()
    capacity = capacity[['ICU_AllPatients','ICU_Covid19Patients','ICU_Capacity','Total_AllPatients',
                         'Total_Covid19Patients','Total_Capacity','ICU_NonCovid19Patients','ICU_FreeCapacity',
                         'Total_NonCovid19Patients','Total_FreeCapacity','type_variant']]
    capacity = capacity.drop_duplicates()
    capacity = capacity.loc[capacity['type_variant']=='fp7d']
    capacity = capacity.drop(['type_variant'], axis=1)
    df = df.join(capacity)
    
    # percentage of virus variants
    variants = pd.read_csv("data/FOPH/data/COVID19Variants.csv")
    variants = variants.loc[(variants['geoRegion']==cantonId) & (variants['variant_type']=='voc_digitally_reported') & (variants['data_quality']=='sufficient') ] #(variants['variant_type']=='voc_known')
    variants = variants.set_index('date').sort_index()
    variants = variants[['lower_ci_day','upper_ci_day','anteil_pos']]
    df = df.join(variants)
    
    
    
    
    # daily basis data
    
    # attach daily positive cases
    caseDf = pd.read_csv("data/FOPH/data/COVID19Cases_geoRegion.csv")
    caseDf = caseDf.loc[caseDf["geoRegion"]==cantonId]
    caseDf = caseDf.set_index('datum')
    interestedCols = ['entries','inz_entries','inzsumTotal']
    caseDf = caseDf[interestedCols]
    caseDf.columns = ["case_"+e for e in interestedCols]
    df = df.join(caseDf)
    
    # attach daily hospital cases
    hospDf = pd.read_csv("data/FOPH/data/COVID19Hosp_geoRegion.csv")
    hospDf = hospDf.loc[hospDf["geoRegion"]==cantonId]
    hospDf = hospDf.set_index('datum')
    interestedCols = ['entries','inz_entries','inzsumTotal']
    hospDf = hospDf[interestedCols]
    hospDf.columns = ["hosp_"+e for e in interestedCols]
    df = df.join(hospDf)
    
    # attach daily death cases
    deathDf = pd.read_csv("data/FOPH/data/COVID19Death_geoRegion.csv")
    deathDf = deathDf.loc[deathDf["geoRegion"]==cantonId]
    deathDf = deathDf.set_index('datum')
    interestedCols = ['entries','inz_entries','inzsumTotal']
    deathDf = deathDf[interestedCols]
    deathDf.columns = ["death_"+e for e in interestedCols]
    df = df.join(deathDf)
    
    # attach daily test
    testDf = pd.read_csv("data/FOPH/data/COVID19Test_geoRegion_all.csv")
    testDf = testDf.loc[testDf["geoRegion"]==cantonId]
    testDf = testDf.set_index('datum')
    interestedCols = ['entries','inz_entries','inzsumTotal']
    testDf = testDf[interestedCols]
    testDf.columns = ["test_"+e for e in interestedCols]
    df = df.join(testDf)
    
    # attach daily R-values
    rvalueDf = pd.read_csv("data/FOPH/data/COVID19Re_geoRegion.csv")
    rvalueDf = rvalueDf.loc[rvalueDf["geoRegion"]==cantonId]
    rvalueDf = rvalueDf.set_index('date')
    interestedCols = ['median_R_mean','median_R_highHPD','median_R_lowHPD']
    rvalueDf = rvalueDf[interestedCols]
    df = df.join(rvalueDf)
    
    
    # attach google mobility data
    mobDf = pd.read_csv("data/GoogleMobility/2020_CH_Region_Mobility_Report.csv")
    mobDf = mobDf.loc[mobDf["sub_region_1"]==googleMobDict[cantonId]]
    mobDf = mobDf.set_index('date')
    interestedCols = ['retail_and_recreation_percent_change_from_baseline',
                  'grocery_and_pharmacy_percent_change_from_baseline',
                  'parks_percent_change_from_baseline',
                  'transit_stations_percent_change_from_baseline',
                  'workplaces_percent_change_from_baseline',
                  'residential_percent_change_from_baseline'
                 ]
    mobDf = mobDf[interestedCols]
    df = df.join(mobDf)
    
    # attach KOF strigency index
    kofDf = pd.read_csv("data/KOF/KOFStrigencyIndex.csv")
    kofDf = kofDf.set_index('date')
    kofDf = kofDf[["ch.kof.stringency."+cantonId.lower()+".stringency_plus"]]
    kofDf.rename(columns = {"ch.kof.stringency."+cantonId.lower()+".stringency_plus":'kofStrigency'}, inplace = True)
    df = df.join(kofDf)
    
    # attach all measures
    measuresDf = pd.read_csv("data/measures/"+cantonId+".csv")
    measuresDf = measuresDf.set_index('Time')
    df = df.join(measuresDf)

    # compute historic weather
    histWeathDf = pd.read_csv("static_data/historicweather/"+cantonId+".csv")
    histWeathDf = histWeathDf.set_index('dt')
    histWeathDf = histWeathDf[['main.temp_min','main.temp_max','clouds.all', 'rain.1h', 'rain.3h','snow.1h']]
    histWeathDf = histWeathDf.fillna(0)
    histWeathDf['precipitation'] = histWeathDf[['rain.1h','snow.1h']].sum(axis=1)
    histWeathDf = histWeathDf[['main.temp_min','main.temp_max','clouds.all','precipitation']] 

    endDate = datetime.datetime.strptime(histWeathDf.index[-1], '%Y-%m-%d %H:%M:%S')
    endDate = endDate.replace(hour=0, minute=0)
    startDate = datetime.datetime(2020, 3, 19)
    #endDate = datetime.datetime(2021, 3, 18) #change second date to today
    temp = pd.DataFrame(index=pd.date_range(start=startDate, end=endDate), columns=histWeathDf.columns)
    for day in pd.date_range(start=startDate, end=endDate):   
        oneDay = histWeathDf.filter(like=day.strftime('%Y-%m-%d'), axis=0)
        temp.loc[day] = {'main.temp_min': oneDay['main.temp_min'].min(), 
                         'main.temp_max': oneDay['main.temp_max'].max(), 
                         'clouds.all': oneDay['clouds.all'].mean(),
                         'precipitation': oneDay['precipitation'].sum()}
    histWeathDf = temp
    histWeathDf.columns = ['temp_min', 'temp_max', 'clouds', 'precipitation']
        
    # compute statistic weather for missing values
    statWeathDf = pd.read_csv("static_data/historicweather/statistical_"+cantonId+".csv")
    statWeathDf['date'] = statWeathDf.apply(lambda row: datetime.datetime(2020,int(row["month"]),int(row["day"])), axis=1)
    statWeathDf = statWeathDf.set_index('date')
    statWeathDf = statWeathDf[['temp.average_min','temp.average_max','clouds.mean','precipitation.mean']]
    statWeathDf.columns = ['temp_min', 'temp_max', 'clouds', 'precipitation']
    
    weather = statWeathDf.append(histWeathDf)
    
    df = df.join(weather)
    
    
    # attach holidays & vacations 
    holy = pd.read_csv("data/HolidayVacation/HolidayVacation.csv").set_index("date")[[cantonId]]
    holy.rename(columns = {cantonId:'isHolyday'}, inplace = True) 
    df = df.join(holy)
   
    
    # attach intervista mobility data
    averageAndMedian = pd.read_csv("data/IntervistaMobility/Mittelwerte_und_Median_pro_Tag.csv", encoding="mac_roman")
    averageAndMedian = averageAndMedian.loc[(averageAndMedian["Beschreibung"] == "Distanz") & (averageAndMedian["Typ"] == "Median")]
    averageAndMedian = averageAndMedian.set_index("Datum")
    averageAndMedian = averageAndMedian[['Alter_15-29', 'Alter_30-64', 'Alter_65-79','M‰nnlich', 'Weiblich', 
                                         'St‰dtisch', 'L‰ndlich', 'Erwerbst‰tig','In_Ausbildung', 'Nicht_Erwerbst‰tig',
                                         'Auto_Ja','Auto_Nein', 'Haushaltsgrˆsse_1_Person', 'Haushaltsgrˆsse_2_Personen',
                                         'Haushaltsgrˆsse_3+_Personen', 'Kinder_Ja', 'Kinder_Nein', 'D-CH','F-CH', 'I-CH']]

    #averageAndMedian[interestedCols].rolling(window=7).mean().plot(kind='line', y=interestedCols, figsize=(20,10))
    D_CH = ['AG','AI','AR', 'BE','BL', 'BS','LU','GR','NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG','GL','UR','ZG','ZH']
    F_CH = ['FR', 'GE', 'JU', 'VD', 'VS', 'NE']
    #'TI'
    if cantonId in D_CH:
        df = df.join(averageAndMedian[['D-CH']])
        df.rename(columns = {'D-CH':'intervistaMob'}, inplace = True) 
    elif cantonId in F_CH:
        df = df.join(averageAndMedian[['F-CH']])
        df.rename(columns = {'F-CH':'intervistaMob'}, inplace = True) 
    else:
        # cantonId = TI
        df = df.join(averageAndMedian[['I-CH']])
        df.rename(columns = {'I-CH':'intervistaMob'}, inplace = True)
  
    # attach neighbor incidents (WEEKLY)
    neigbors = {
      'AG': ['BL','SO','BE','LU','ZH','Baden-Wurttemberg','ZG'],
      'AI': ['AR','SG'],
      'AR': ['AI','SG'],
      'BE': ['AG','SO','JU','NE','FR','VD','VS','UR','NW','OW','LU'], 
      'BL': ['AG','BS','SO','Baden-Wurttemberg','Grand Est'], 
      'BS': ['BL','Baden-Wurttemberg','Grand Est'], 
      'FR': ['BE','VD','NE'], 
      'GE': ['VD','Auvergne Rhone Alpes'], 
      'GL': ['SG','SZ','UR','GR'], 
      'GR': ['SG','GL','UR','TI','Vorarlberg','Lombardia','Liechtenstein'], 
      'JU': ['BL','SO','BE','NE','Grand Est','Bourgogne Franche Comte'], 
      'LU': ['AG','BE','NW', 'OW','ZG','SZ'], 
      'NE': ['JU','BE','VD','FR','Bourgogne Franche Comte'], 
      'NW': ['OW','BE','LU','SZ','UR'], 
      'OW': ['NW','LU','BE','UR'], 
      'SG': ['AI','AR','TG','ZH','SZ','GL','GR','Vorarlberg','Liechtenstein'], 
      'SH': ['TG','ZH','Baden-Wurttemberg'], 
      'SO': ['BE','JU','BL','AG','Grand Est'],
      'SZ': ['ZG','ZH','SG','GL','LU','NW','UR'], 
      'TG': ['SH','ZH','SG','Baden-Wurttemberg'], 
      'TI': ['UR','GR','Piemonte','Lombardia'], 
      'UR': ['TI','VS','GR','BE','NW','OW','SZ','GL'], 
      'VD': ['NE','GE','FR','VS','BE','Auvergne Rhone Alpes','Bourgogne Franche Comte'], 
      'VS': ['VD','BE','UR','Piemonte','Auvergne Rhone Alpes'], 
      'ZG': ['ZH','AG','LU','SZ'],
      'ZH': ['SH','AG','ZG','SZ','SG','TG','Baden-Wurttemberg']
    }
    

    foph = pd.read_csv("data/FOPH/data/COVID19Cases_geoRegion.csv", parse_dates=True)
    # we have to convert each date string to datetime to merge the dataframes later
    foph[['datum']] = foph[['datum']].applymap(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d').date())
    foph = foph.set_index('datum')
    foph['rate_14_day_per_100k'] = (100000*foph['sum14d']) / foph['pop']
    foph = foph[['geoRegion','rate_14_day_per_100k']]
    temp = pd.DataFrame({'time':listOfMondays})
    temp = temp.set_index('time')
    ecdc = pd.read_csv("data/ECDC/ECDCsubnationalcaseweekly.csv")
    ecdc[['year_week']] = ecdc[['year_week']].applymap(lambda x: mondaysByWeekNr[x[0:4]+x[6:8]]) #datetime.datetime.strptime(x.replace('-','')+' MON', '%YW%U %a').date())
    #display(type(ecdc['year_week'][0])) # year_week is is datetime.date
    ecdc = ecdc.set_index("year_week")
    ecdc = ecdc[['region_name','rate_14_day_per_100k']]
    
    for n in neigbors[cantonId]:
        if len(n) != 2:
            temp = temp.join(ecdc.loc[ecdc['region_name']==n][['rate_14_day_per_100k']])
            temp.rename(columns = {'rate_14_day_per_100k':('incidence_'+n)}, inplace = True)
        else:
            temp = temp.join(foph.loc[foph['geoRegion']==n][['rate_14_day_per_100k']])
            temp.rename(columns = {'rate_14_day_per_100k':('incidence_'+n)}, inplace = True)
    # also add the incidence of the actual canton
    #temp = temp.join(foph.loc[foph['geoRegion']==cantonId][['rate_14_day_per_100k']])
    #temp.rename(columns = {'rate_14_day_per_100k':('incidence_'+cantonId)}, inplace = True)
    temp['meanNeighborIncidence'] = temp.mean(axis=1)
    temp['maxNeighborIncidence'] = temp.max(axis=1)
    #display(temp)
    df = df.join(temp[['meanNeighborIncidence','maxNeighborIncidence']])
    
    if not os.path.exists('data/merged'):
        os.makedirs('data/merged')
    df.to_csv('data/merged/'+cantonId+'.csv')
    

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
