In [16]:
# disable future warning
import warnings
warnings.simplefilter('ignore')

# other imports
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from meteostat import Stations, Daily
import geopandas as gp
import pandas as pd
from shapely.geometry import Point
from datetime import date, datetime
from tqdm import tqdm

In [17]:
# setup cache
Stations.cache_dir = "./prod/.meteostat/cache"

query = Stations()
query.region("DE")
stations = query.fetch()
stations.reset_index(inplace=True)

In [18]:
path = "prod/weather.db"
con = create_engine("sqlite:///"+path, echo=False)

In [19]:
# remove unnecessary station data
cols = [
    'id',
    'latitude',
    'longitude',
    'elevation',
    'daily_start',
    'daily_end'
]
stations = stations[cols]
stations.rename({'id':'station_id'}, axis=1, inplace=True)

In [20]:
# read shape files
shape = [
    gp.read_file(f"./data/nuts5000/5000_NUTS{i}.shp").to_crs(epsg=4326)
    for i in range(1, 4)
]

# spacial join shape files. This is possible since nuts is hirarchical
nuts:gp.GeoDataFrame
nuts = shape[2].sjoin(shape[1], how="left", lsuffix='3', rsuffix='2', predicate="within")\
               .sjoin(shape[0], how="left", rsuffix='1', predicate="within")
nuts.rename({"NUTS_CODE":"NUTS_CODE_1",	"NUTS_NAME":"NUTS_NAME_1"}, inplace=True, axis=1)
nuts.drop("NUTS_LEVEL_2 NUTS_LEVEL_3 NUTS_LEVEL index_2 index_1".split(), inplace=True, axis=1)
nuts.sort_index(inplace=True)
nuts.head()

Unnamed: 0,NUTS_CODE_3,NUTS_NAME_3,geometry,NUTS_CODE_2,NUTS_NAME_2,NUTS_CODE_1,NUTS_NAME_1
0,DE111,"Stuttgart, Stadtkreis","POLYGON ((9.13452 48.85668, 9.14122 48.86183, ...",DE11,Stuttgart,DE1,Baden-Württemberg
1,DE112,Böblingen,"POLYGON ((8.96647 48.82980, 8.99216 48.83356, ...",DE11,Stuttgart,DE1,Baden-Württemberg
2,DE113,Esslingen,"POLYGON ((9.40973 48.53721, 9.39153 48.53014, ...",DE11,Stuttgart,DE1,Baden-Württemberg
3,DE114,Göppingen,"POLYGON ((9.91934 48.63977, 9.94730 48.63369, ...",DE11,Stuttgart,DE1,Baden-Württemberg
4,DE115,Ludwigsburg,"MULTIPOLYGON (((9.30157 48.95210, 9.31516 48.9...",DE11,Stuttgart,DE1,Baden-Württemberg


In [21]:
# create GeoDataFrame from stations
x, y = stations["longitude"], stations["latitude"]
stations["geometry"] = gp.GeoSeries(map(Point, zip(x, y)))
stations = gp.GeoDataFrame(stations)
# spactial join with nuts data
stations = stations.sjoin(nuts, how="inner", predicate="within")
stations.drop("index_right", axis=1, inplace=True)
stations.sort_index(inplace=True)
stations.drop("geometry", axis=1, inplace=True)
stations.head()

Unnamed: 0,station_id,latitude,longitude,elevation,daily_start,daily_end,NUTS_CODE_3,NUTS_NAME_3,NUTS_CODE_2,NUTS_NAME_2,NUTS_CODE_1,NUTS_NAME_1
1230,10015,54.1833,7.9,4.0,1952-05-01,2022-12-18,DEF09,Pinneberg,DEF0,Schleswig-Holstein,DEF,Schleswig-Holstein
1231,10018,54.9167,8.35,16.0,2009-02-24,2022-04-25,DEF07,Nordfriesland,DEF0,Schleswig-Holstein,DEF,Schleswig-Holstein
1232,10020,55.0167,8.4167,26.0,1931-01-01,2022-12-18,DEF07,Nordfriesland,DEF0,Schleswig-Holstein,DEF,Schleswig-Holstein
1233,10022,54.8,8.95,7.0,1973-01-01,2022-12-18,DEF07,Nordfriesland,DEF0,Schleswig-Holstein,DEF,Schleswig-Holstein
1234,10026,54.5167,9.15,28.0,1891-01-01,1974-06-30,DEF07,Nordfriesland,DEF0,Schleswig-Holstein,DEF,Schleswig-Holstein


In [22]:
# save to database
stations.to_sql('stationinfo', con, if_exists="replace")

1116