# Import Statements

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import urllib.request, json # we will need urllib to communicate with the api and then json to read the data we get

---

# Holiday Data

## NRW (Duisburg)

### Get official NRW holidays (Feiertage) from API

In [2]:
with urllib.request.urlopen("https://feiertage-api.de/api/?jahr=2019") as url:
    nrw_holidays_json = json.loads(url.read().decode())

In [3]:
nrw_holidays = []
for name, data in nrw_holidays_json['NW'].items():
    nrw_holidays.append(pd.to_datetime(data['datum']))
index = pd.DatetimeIndex(nrw_holidays)
nrw_holidays = pd.Series(data=True, index = index)

### Get official NRW vacations (Ferien) from API

In [4]:
with urllib.request.urlopen("https://ferien-api.de/api/v1/holidays/NW/2019") as url:
    nrw_vacations_json = json.loads(url.read().decode())

In [5]:
nrw_vacations = pd.DataFrame(nrw_vacations_json)

In [6]:
nrw_vacations['start'] = pd.to_datetime(nrw_vacations['start'])
nrw_vacations['end'] = pd.to_datetime(nrw_vacations['end'])

In [7]:
nrw_vacations.drop(nrw_vacations.columns.difference(['start','end']), 1, inplace=True)

In [8]:
nrw_vacation_index = pd.DatetimeIndex({})
for index, row in nrw_vacations.iterrows():
    nrw_vacation_index = nrw_vacation_index.append(pd.date_range(row['start'], row['end']))

In [9]:
nrw_vacations = pd.Series(index=nrw_vacation_index, data=True)

In [10]:
nrw_holidays_full = pd.DataFrame({'is_holiday': nrw_holidays.append(nrw_vacations)})

We remove duplicate indeces, because holidays can be in vacations.

In [11]:
nrw_holidays_full = nrw_holidays_full.reset_index().drop_duplicates(subset='index', keep='last').set_index('index')

In [12]:
nrw_holidays_full = nrw_holidays_full.asfreq('h', fill_value=True)

In [13]:
nrw_holidays_full.to_pickle('../0_data/nrw_holidays.pkl')

## Hessen (Marburg)

### Get official Hessen holidays (Feiertage) from API

In [14]:
with urllib.request.urlopen("https://feiertage-api.de/api/?jahr=2019") as url:
    he_holidays_json = json.loads(url.read().decode())

In [15]:
he_holidays = []
for name, data in he_holidays_json['HE'].items():
    he_holidays.append(pd.to_datetime(data['datum']))
index = pd.DatetimeIndex(he_holidays)
he_holidays = pd.Series(data=True, index = index)

In [16]:
he_holidays

2019-01-01    True
2019-04-19    True
2019-04-22    True
2019-05-01    True
2019-05-30    True
2019-06-10    True
2019-06-20    True
2019-10-03    True
2019-12-25    True
2019-12-26    True
dtype: bool

### Get official Hessen vacations (Ferien) from API

In [17]:
with urllib.request.urlopen("https://ferien-api.de/api/v1/holidays/HE/2019") as url:
    he_vacations_json = json.loads(url.read().decode())

In [18]:
he_vacations = pd.DataFrame(he_vacations_json)

In [19]:
he_vacations['start'] = pd.to_datetime(he_vacations['start'])
he_vacations['end'] = pd.to_datetime(he_vacations['end'])

In [20]:
he_vacations.drop(he_vacations.columns.difference(['start','end']), 1, inplace=True)

In [21]:
he_vacation_index = pd.DatetimeIndex({})
for index, row in he_vacations.iterrows():
    he_vacation_index = he_vacation_index.append(pd.date_range(row['start'], row['end']))

In [22]:
he_vacations = pd.Series(index=he_vacation_index, data=True)

In [23]:
he_holidays_full = pd.DataFrame({'is_holiday': he_holidays.append(he_vacations)})

In [24]:
he_holidays_full = he_holidays_full.reset_index().drop_duplicates(subset='index', keep='last').set_index('index')

In [25]:
he_holidays_full = he_holidays_full.asfreq('h', fill_value=True)

In [26]:
he_holidays_full.to_pickle('../0_data/he_holidays.pkl')