In [26]:
import os, urllib.request, json
import pandas as pd
import shutil
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
from icalendar import Calendar
import pandas as pd
import datetime
from datetime import date, timedelta
import os

In [None]:
# this code section loads all needed data to a local folder and immediately creates a backup

# get newest available string to download FOPH data
zipurl = ''
with urllib.request.urlopen("https://www.covid19.admin.ch/api/data/context") as url:
    data = json.loads(url.read().decode())
    zipurl = data["sources"]["zip"]["csv"]

# download the FOPH data (use this data also for the virus variants)
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/FOPH')

# download the Google Mobility data
zipurl = 'https://www.gstatic.com/covid19/mobility/Region_Mobility_Report_CSVs.zip'
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/GoogleMobility')

# download the Intervista Mobility data
zipurl = 'https://www.intervista.ch/media/2020/03/Download_Mobilit%C3%A4ts-Monitoring_Covid-19.zip'
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/IntervistaMobility')

# KOF strigency index data
df = pd.read_csv('https://datenservice.kof.ethz.ch/api/v1/public/sets/stringency_plus_web?mime=csv&df=Y-m-d')
if not os.path.exists('data/KOF'):
    os.makedirs('data/KOF')
df.to_csv('data/KOF/KOFStrigencyIndex.csv')
      
# Oxford COVID-19 Government Response Tracker
df = pd.read_csv('https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv')
if not os.path.exists('data/Oxford'):
    os.makedirs('data/Oxford')
df.to_csv('data/Oxford/OxfordStrigencyIndex.csv')

# TODO: weather (open weather api key requested, but i will only get past data maximum one year back)

# ==================== THE MANUAL WORK ==================

cantonKeys = ['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']
cantonNames = ['aargau','appenzell-innerrhoden','appenzell-ausserrhoden','bern','basel-land','basel-stadt','freiburg','genf','glarus','graubuenden','jura','luzern','neuenburg','nidwalden','obwalden','sankt-gallen','schaffhausen','solothurn','schwyz','thurgau','tessin','uri','waadt','wallis','zug','zuerich']
# create dictionary
d = dict(zip(cantonKeys, cantonNames))

holydayVacationTable = pd.DataFrame(index=pd.date_range(start='1/1/2020', end='31/12/2021'), columns = cantonKeys)
holydayVacationTable[cantonKeys] = 0


def fillCantonHolidays(cantonKey, filename):
    # extract dates from file and fill in all special holidays
    file = open(filename, 'rb')
    cal = Calendar.from_ical(file.read())
    for e in cal.walk('vevent'):
        start = e['DTSTART'].to_ical().decode('utf-8')
        parsedDate = datetime.datetime.strptime(start, '%Y%m%d')
        holydayVacationTable[cantonKey][parsedDate] = 1  
    

def fillCantonVacation(cantonKey, filename):
    # extract dates from file
    file = open(filename, 'rb')
    cal = Calendar.from_ical(file.read())
    for e in cal.walk('vevent'):
        startDate = e['DTSTART'].to_ical().decode('utf-8')
        endDate = e['DTEND'].to_ical().decode('utf-8')
        parsedStartDate = datetime.datetime.strptime(startDate, '%Y%m%d')
        parsedEndDate = datetime.datetime.strptime(endDate, '%Y%m%d') 
        if parsedEndDate > datetime.datetime(2021, 12, 31):
            parsedEndDate = datetime.datetime(2021, 12, 31)
        r = pd.date_range(start=parsedStartDate, end=parsedEndDate)
        holydayVacationTable[cantonKey][r] = 1
        
        
for c in cantonKeys:
    # fill the canton holidays which are only single days
    for p in ['static_data/holidays/2020/', 'static_data/holidays/2021/']:
        matches = [match for match in os.listdir(p) if d[c] in match]
        filename = matches[0]
        path = p + filename
        fillCantonHolidays(c, path)
       
    # fill the school vacations which have a start and end date
    for p in ['static_data/vacations/2020/', 'static_data/vacations/2021/']:
        matches = [match for match in os.listdir(p) if d[c] in match]
        filename = matches[0]
        path = p + filename
        fillCantonVacation(c, path)
    
# offset = 6 gets all sundays, offset 5 all saturdays
def getDays(year, offset):
   d = date(year, 1, 1)                    
   d += timedelta(days = offset - d.weekday())  
   while d.year == year:
      yield d
      d += timedelta(days = 7)

listOfSaturdaysSundays = []
for year in [2020,2021]:
    for weekday in [5,6]:
        for day in getDays(year, weekday):
           listOfSaturdaysSundays.append(day)


for e in listOfSaturdaysSundays:
    holydayVacationTable.loc[e]=1


if not os.path.exists('data/HolidayVacation'):
    os.makedirs('data/HolidayVacation')
holydayVacationTable.to_csv('data/HolidayVacation/HolidayVacation.csv')
# ========================================================


# TODO: measures.csv cantonal/federal

# International data for bordering countries (only weekly because germany, also earliest data point is from week 13 2020)
df = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/subnationalcaseweekly/csv')
if not os.path.exists('data/ECDC'):
    os.makedirs('data/ECDC')
df.to_csv('data/ECDC/ECDCsubnationalcaseweekly.csv')

# International data for bordering countries (only country level data)
df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')
if not os.path.exists('data/OWID'):
    os.makedirs('data/OWID')
df.to_csv('data/OWID/OWIDcoviddata.csv')

# create a backup of the data we just loaded
if not os.path.exists('backups'):
    os.makedirs('backups')
now = datetime.datetime.now()
backupname = now.strftime("backup-%Y-%m-%d-%H-%M-%S")
shutil.copytree('data', 'backups/'+backupname)

In [48]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

#======================== construct the federal measures ================
columnNames = pd.read_excel('static_data/measures/measures.xlsx', sheet_name = 'Federal').columns
measures = pd.read_excel('static_data/measures/measures.xlsx', sheet_name = 'Federal', skiprows=6, names=columnNames)

measures = measures.set_index('Time')

for day in pd.date_range(start=datetime.datetime(2020, 1, 1), end=datetime.datetime(2021, 12, 31)):
    if not day in measures.index:
        measures.loc[day] = [float('NaN')] * len(measures.columns)

measures = measures.sort_index()

# propagate the update changes to all other days
for j in measures.columns: #iterate over columns
    dailyMeasureLevel = 0
    for i in measures.index: #iterate over rows #get actual cell value
        if math.isnan(measures.loc[i, j]):
            measures.loc[i, j] = dailyMeasureLevel
        else:
            dailyMeasureLevel = measures.loc[i, j]
'''
for m in measures.columns:
    measures.plot(kind='line', y=m, figsize=(15,15))
    #measures.reset_index().plot.scatter(x = 'Time', y = m)
    plt.show()
'''
#======================== construct the cantonal measures ================
# todo, also think of the exception for some canton in december
# copy the federal measures and use max function (with some exceptions)

"\nfor m in measures.columns:\n    measures.plot(kind='line', y=m, figsize=(15,15))\n    #measures.reset_index().plot.scatter(x = 'Time', y = m)\n    plt.show()\n"