In [51]:
import os, urllib.request, json
import pandas as pd
import shutil
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
from icalendar import Calendar
import datetime
from datetime import date, timedelta, timezone
import os
import matplotlib.pyplot as plt
import numpy as np
import math

cantonKeys = ['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']


In [3]:

# create dictionary


# this code section loads all needed data to a local folder and immediately creates a backup

# get newest available string to download FOPH data
zipurl = ''
with urllib.request.urlopen("https://www.covid19.admin.ch/api/data/context") as url:
    data = json.loads(url.read().decode())
    zipurl = data["sources"]["zip"]["csv"]

# download the FOPH data (use this data also for the virus variants)
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/FOPH')

# download the Google Mobility data
zipurl = 'https://www.gstatic.com/covid19/mobility/Region_Mobility_Report_CSVs.zip'
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/GoogleMobility')

# download the Intervista Mobility data
zipurl = 'https://www.intervista.ch/media/2020/03/Download_Mobilit%C3%A4ts-Monitoring_Covid-19.zip'
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/IntervistaMobility')

# KOF strigency index data
df = pd.read_csv('https://datenservice.kof.ethz.ch/api/v1/public/sets/stringency_plus_web?mime=csv&df=Y-m-d')
if not os.path.exists('data/KOF'):
    os.makedirs('data/KOF')
df.to_csv('data/KOF/KOFStrigencyIndex.csv')
      
# Oxford COVID-19 Government Response Tracker
df = pd.read_csv('https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv')
if not os.path.exists('data/Oxford'):
    os.makedirs('data/Oxford')
df.to_csv('data/Oxford/OxfordStrigencyIndex.csv')

# TODO: weather (open weather api key requested, but i will only get past data maximum one year back)

# ==================== THE MANUAL WORK ==================
holydayVacationTable = pd.DataFrame(index=pd.date_range(start='1/1/2020', end='31/12/2021'), columns = cantonKeys)
holydayVacationTable[cantonKeys] = 0


def fillCantonHolidays(cantonKey, filename):
    # extract dates from file and fill in all special holidays
    file = open(filename, 'rb')
    cal = Calendar.from_ical(file.read())
    for e in cal.walk('vevent'):
        start = e['DTSTART'].to_ical().decode('utf-8')
        parsedDate = datetime.datetime.strptime(start, '%Y%m%d')
        holydayVacationTable[cantonKey][parsedDate] = 1  
    

def fillCantonVacation(cantonKey, filename):
    # extract dates from file
    file = open(filename, 'rb')
    cal = Calendar.from_ical(file.read())
    for e in cal.walk('vevent'):
        startDate = e['DTSTART'].to_ical().decode('utf-8')
        endDate = e['DTEND'].to_ical().decode('utf-8')
        parsedStartDate = datetime.datetime.strptime(startDate, '%Y%m%d')
        parsedEndDate = datetime.datetime.strptime(endDate, '%Y%m%d') 
        if parsedEndDate > datetime.datetime(2021, 12, 31):
            parsedEndDate = datetime.datetime(2021, 12, 31)
        r = pd.date_range(start=parsedStartDate, end=parsedEndDate)
        holydayVacationTable[cantonKey][r] = 1

        
# special mapping for holiday and vacation file names
vacHolyDictionary = dict(zip(cantonKeys, ['aargau','appenzell-innerrhoden','appenzell-ausserrhoden','bern','basel-land','basel-stadt','freiburg','genf','glarus','graubuenden','jura','luzern','neuenburg','nidwalden','obwalden','sankt-gallen','schaffhausen','solothurn','schwyz','thurgau','tessin','uri','waadt','wallis','zug','zuerich']))       
for c in cantonKeys:
    # fill the canton holidays which are only single days
    for p in ['static_data/holidays/2020/', 'static_data/holidays/2021/']:
        matches = [match for match in os.listdir(p) if vacHolyDictionary[c] in match]
        filename = matches[0]
        path = p + filename
        fillCantonHolidays(c, path)
       
    # fill the school vacations which have a start and end date
    for p in ['static_data/vacations/2020/', 'static_data/vacations/2021/']:
        matches = [match for match in os.listdir(p) if vacHolyDictionary[c] in match]
        filename = matches[0]
        path = p + filename
        fillCantonVacation(c, path)
    
# offset = 6 gets all sundays, offset 5 all saturdays
def getDays(year, offset):
   d = date(year, 1, 1)                    
   d += timedelta(days = offset - d.weekday())  
   while d.year == year:
      yield d
      d += timedelta(days = 7)

listOfSaturdaysSundays = []
for year in [2020,2021]:
    for weekday in [5,6]:
        for day in getDays(year, weekday):
           listOfSaturdaysSundays.append(day)


for e in listOfSaturdaysSundays:
    holydayVacationTable.loc[e]=1


if not os.path.exists('data/HolidayVacation'):
    os.makedirs('data/HolidayVacation')
holydayVacationTable.to_csv('data/HolidayVacation/HolidayVacation.csv')
# ========================================================


# International data for bordering countries (only weekly because germany, also earliest data point is from week 13 2020)
df = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/subnationalcaseweekly/csv')
if not os.path.exists('data/ECDC'):
    os.makedirs('data/ECDC')
df.to_csv('data/ECDC/ECDCsubnationalcaseweekly.csv')

# International data for bordering countries (only country level data)
df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')
if not os.path.exists('data/OWID'):
    os.makedirs('data/OWID')
df.to_csv('data/OWID/OWIDcoviddata.csv')

#======================== construct the federal measures ================
columnNames = pd.read_excel('static_data/measures/measures.xlsx', sheet_name = 'Federal').columns
measures = pd.read_excel('static_data/measures/measures.xlsx', sheet_name = 'Federal', skiprows=6, names=columnNames)

measures = measures.set_index('Time')

for day in pd.date_range(start=datetime.datetime(2020, 1, 1), end=datetime.datetime(2021, 12, 31)):
    if not day in measures.index:
        measures.loc[day] = [float('NaN')] * len(measures.columns)

measures = measures.sort_index()

# propagate the update changes to all other days
for j in measures.columns: #measure
    dailyMeasureLevel = 0
    for i in measures.index: #day
        if math.isnan(measures.loc[i, j]):
            measures.loc[i, j] = dailyMeasureLevel
        else:
            dailyMeasureLevel = measures.loc[i, j]

'''
# plotting federal measures
for m in measures.columns:
    measures.plot(kind='line', y=m, figsize=(15,15))
    plt.show()
'''
if not os.path.exists('data/measures'):
    os.makedirs('data/measures')
measures.to_csv('data/measures/federal.csv')

#======================== construct the cantonal measures ================
for c in cantonKeys: 
    # copy the federal measures and use max function (with some exceptions)
    cantMeasuresComplete = measures.copy()
    cantMeasures = pd.read_excel('static_data/measures/measures.xlsx', sheet_name = c)

    cantMeasures = cantMeasures.set_index('Time')

    for day in pd.date_range(start=datetime.datetime(2020, 1, 1), end=datetime.datetime(2021, 12, 31)):
        if not day in cantMeasures.index:
            cantMeasures.loc[day] = [float('NaN')] * len(cantMeasures.columns)

    cantMeasures = cantMeasures.sort_index()


    for m in cantMeasures.columns:
        dailyMeasureLevel = float('NaN')
        for day in cantMeasures.index:
            if not math.isnan(cantMeasures.loc[day,m]):
                if cantMeasures.loc[day,m] != -1:
                    dailyMeasureLevel = cantMeasures.loc[day,m]
                else:
                    cantMeasures.loc[day,m] = 0
                    dailyMeasureLevel = float('NaN')
            elif math.isnan(cantMeasures.loc[day,m]) and not math.isnan(dailyMeasureLevel):
                cantMeasures.loc[day,m] = dailyMeasureLevel
            else:
                cantMeasures.loc[day,m] = 0

    # from 22.12 until 9.1 cantonal measures are stronger than federal for restaurants, recreational, sport facilities
    for m in cantMeasures.columns:
        for day in cantMeasures.index:
            if (day < datetime.datetime(2021, 1, 9) and day >= datetime.datetime(2020, 12, 22)  and (m == 'Restaurants' or m=='Cultural, entertainment and recreational facilities' or m=='Sport/Wellness facilities')) or (day >= datetime.datetime(2020, 12, 23) and day < datetime.datetime(2021, 1, 3) and m=='Gatherings/private events'):
                # cantonal exeption possible
                if cantMeasures.loc[day,m] != 0:
                    cantMeasuresComplete.loc[day,m] = cantMeasures.loc[day,m]
            else:
                cantMeasuresComplete.loc[day,m] = max(cantMeasuresComplete.loc[day,m], cantMeasures.loc[day,m])
    
    if not os.path.exists('data/measures'):
        os.makedirs('data/measures')
    cantMeasuresComplete.to_csv('data/measures/'+c+'.csv')

'''            
# plotting selected canton against federal measures         
c = []
for m in cantMeasuresComplete.columns:
    c.append("cant"+m)            
cantMeasuresComplete.columns = c
result = pd.concat([cantMeasuresComplete, measures], axis=1, join="inner")

for m in measures.columns:
    result.plot(kind='line', y=[m,"cant"+m], figsize=(10,10))
    #cantMeasures.reset_index().plot.scatter(x = 'Time', y = m, figsize=(15,15))
    plt.show()
'''

# create a backup of the data we just loaded
if not os.path.exists('backups'):
    os.makedirs('backups')
now = datetime.datetime.now()
backupname = now.strftime("backup-%Y-%m-%d-%H-%M-%S")
shutil.copytree('data', 'backups/'+backupname)

'backups/backup-2021-03-18-16-20-00'

In [92]:
import requests

# special mapping for weather API
#['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']
#["Aarau","Appenzell","Herisau","Bern","Liestal","Basel","Fribourg","Geneve","2660594","Chur","Delemont","Luzern","Neuchatel","Stans","Sarnen","Sankt Gallen", "Schaffhausen", "Olten","Schwyz","Frauenfeld","Bellinzona","2661780","Lausanne","Sion","Zug","Zurich"]
weatherDictionary = dict(zip(cantonKeys,[2661881,2661740,2660365,2661552,2659891,2661604,2660718,2660646,2660594,2661169,2661035,2659811,2659496,2658504,2658786,2658822,2658761,2658564,2658665,2660727,2661567,2661780,2659994,2658576,2657908,2657896]))

apiKey = "0077c15de8e01960cc024a8b11751ead"

for cantonId in weatherDictionary.keys():
    cityId = str(weatherDictionary[cantonId])

    # create new data frame for each canton
    weather = pd.DataFrame(columns=['dt', 'weather', 'main.temp', 'main.feels_like', 'main.pressure',
           'main.humidity', 'main.temp_min', 'main.temp_max', 'wind.speed',
           'wind.deg', 'clouds.all', 'rain.1h'])

    # can only get one week for one call
    startDate = datetime.datetime(2020, 3, 19)
    endDate = datetime.datetime.today()
    for week in pd.date_range(start=startDate, end=endDate, freq='W-THU'):
        unixTimeUTCstart = int(week.replace(tzinfo=timezone.utc).timestamp())
        unixTimeUTCend = int(endDate.replace(tzinfo=timezone.utc).timestamp())
        apiCall = "http://history.openweathermap.org/data/2.5/history/city?id="+cityId+"&type=hour&start="+str(unixTimeUTCstart)+"&end="+str(unixTimeUTCend)+"&appid="+apiKey
        with urllib.request.urlopen(apiCall) as url:
            data = json.loads(url.read().decode())
            dfloaded = pd.json_normalize(data["list"])
            weather = weather.append(dfloaded, ignore_index=True)

    # remove some duplicates (first entry overlaps)
    weather.drop_duplicates(subset=['dt'])
    # transform unix time to datetime
    weather["dt"] = weather["dt"].apply(lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
    if not os.path.exists('static_data/historicweather'):
        os.makedirs('static_data/historicweather')
    weather.to_csv("static_data/historicweather/"+ cantonId +".csv")

#pd.options.display.max_rows = 999


In [117]:
#=============== this script has only to run once and never again (and it did) ==================
for cantonId in weatherDictionary.keys():
    cityId = str(weatherDictionary[cantonId])
    statisticalData = pd.DataFrame()
    for everyDay in pd.date_range(start=datetime.datetime(2020, 1, 1), end=datetime.datetime(2020, 3, 18)):
        monthNumber = str(everyDay.month)
        dayNumber = str(everyDay.day)
        apiCall ="https://history.openweathermap.org/data/2.5/aggregated/day?id="+cityId+"&month="+monthNumber+"&day="+dayNumber+"&appid="+apiKey
        with urllib.request.urlopen(apiCall) as url:
                    data = json.loads(url.read().decode())
                    dfloaded = pd.json_normalize(data["result"])
                    statisticalData = statisticalData.append(dfloaded)
    if not os.path.exists('static_data/historicweather'):
        os.makedirs('static_data/historicweather')
    statisticalData.to_csv("static_data/historicweather/statistical_"+ cantonId +".csv")




In [120]:
# gets the 16 day weather forecast
for cantonId in weatherDictionary.keys():
    cityId = str(weatherDictionary[cantonId])
    forecastData = pd.DataFrame()
    apiCall = "https://api.openweathermap.org/data/2.5/forecast/daily?id="+cityId+"&cnt=16&appid="+apiKey
    with urllib.request.urlopen(apiCall) as url:
        data = json.loads(url.read().decode())
        forecastData = pd.json_normalize(data["list"])

    if not os.path.exists('data/weatherforecast'):
        os.makedirs('data/weatherforecast')
    forecastData.to_csv("data/weatherforecast/"+ cantonId +".csv")
