In [1]:
# supress future warning
import warnings
warnings.simplefilter(action='ignore')

# imports
from collections import defaultdict
from sqlalchemy import create_engine, inspect
import geopandas as gpd
from meteostat import Stations, Daily, Point
import numpy as np
from src.helper import get_nuts_data
from src.helper import get_daily_weather_data, get_daily_weather_data_loc
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [2]:
path = "prod/weather.db"
engine = create_engine("sqlite:///"+path, echo=False)

In [3]:
# Check which tables already exist
insp = inspect(engine)
tables = insp.get_table_names()

## Reading NUTS data
First we read the nuts information which includes the area code and the centroid of each nuts area. This is important as we want to match each area with the SOEP dataset later.

In [4]:
# add nuts data to database
for lvl in range(1, 4):
    print(f"Reading NUTS{lvl} data")
    data = get_nuts_data(lvl)
    data.to_sql(f'nuts{lvl}', con=engine, if_exists='replace')

Reading NUTS1 data
Reading NUTS2 data
Reading NUTS3 data


Because the `meteostat` library does not include `nuts` indexing we will need to convert the area codes to different codes. In the following the dictionaly that does this.

In [5]:
rename = {
    "Baden-Württemberg":"BW",
    "Bayern":"BY",
    "Berlin":"BE",
    "Brandenburg":"BB",
    "Bremen":"HB",
    "Hamburg":"HH",
    "Hessen":"HE",
    "Mecklenburg-Vorpommern":"MV",
    "Niedersachsen":"NI",
    "Nordrhein-Westfalen":"NW",
    "Rheinland-Pfalz":"RP",
    "Saarland":"SL",
    "Sachsen":"SN",
    "Sachsen-Anhalt":"ST",
    "Schleswig-Holstein":"SH",
    "Thüringen":"TH"
}

soep_nameing = defaultdict(
    lambda x: np.nan, {
        "BW":8, "BY":9, "BE":11, "BB":12,
        "HB":4, "HH":2, "HE":6, "MV":13,
        "NI":3, "NW":5, "RP":7, "SL":10,
        "SN":14, "ST":15, "SH":1, "TH":16
    }
)

## Downloading the Weather Data

In [6]:
# define timespan of historical data search
start = datetime(year=1984, month=1, day=1)
end = datetime(year=2021, month=1, day=1)

In [7]:
# we set cache dir to local folder
Stations.cache_dir = './.meteocache'

### NUTS 3

For reading the `nuts3` data we use a radius of 50 km around the area centroid. The reason being that some `nuts3` areas are so small they do not have stations inside them. Therefore this should yield more reliable results. The assumption is of course that climate does not vary too much in a 50 km radius.

In [None]:
nuts3 = pd.read_sql_table('nuts3', con=engine)
radius = 50000
for _, row in tqdm(nuts3.iterrows(), total=nuts3.shape[0]):
    # create table name
    lvl = row['NUTS_LEVEL']
    index = str(lvl) + row['NUTS_CODE'] + '_weather'
    # read data according to coordinates
    lat, lon = row['lat_times100'] / 100, row['lon_times100'] / 100
    daily = get_daily_weather_data(lat, lon, radius, start, end)
    # add nuts ids to table
    daily["nuts_name"] = row['NUTS_NAME']
    # add soep_id
    daily['sloc'] = np.nan # TODO: find which id is used here
    # write data to database
    if daily.shape[0] > 0:
        daily.to_sql(index, con=engine, if_exists='replace')

### NUTS 1 

In [8]:
nuts1 = pd.read_sql_table('nuts1', con=engine)
for _, row in tqdm(nuts1.iterrows(), total=nuts1.shape[0]):
    # create table name
    lvl = row['NUTS_LEVEL']
    index = str(lvl) + row['NUTS_CODE'] + '_weather'
    nuts_name = row['NUTS_NAME']
    daily = get_daily_weather_data_loc(
        loc=("DE", rename[nuts_name]),
        start = start,
        end = end
    )
    # add nutsname to table
    daily["nuts_name"] = nuts_name
    # add soep_id
    daily['sloc'] = soep_nameing[rename[ nuts_name ]]
    # write data to database
    if daily.shape[0] > 0:
        daily.to_sql(index, con=engine, if_exists='replace')

100%|██████████████████████████████████████████| 16/16 [33:20<00:00, 125.05s/it]


### NUTS 0

In [7]:
lvl = 0
index = str(lvl) + 'DE' + '_weather'
daily = get_daily_weather_data_loc(
    loc=("DE",),
    start = start,
    end = end
)
# add nutsname to table
daily["nuts_name"] = "DE"
# add soep_id
daily['sloc'] = np.nan
# write data to database
if daily.shape[0] > 0:
    daily.to_sql(index, con=engine, if_exists='replace')