In [1]:
import numpy as np
import pandas as pd

import zipfile
import io
import os
import glob
import time
import re

import folium

# Bergstraße Groundwater data

Hessen provides a fairly reasonable interface for retrieving groundwater data:
https://lgd.hessen.de/mapapps/resources/apps/lgd/index.html?lang=en

The area was manually selected and then, all groundwater station data and measurement series were downloaded.

## 1. Basic cleaning of station data

In [26]:
station_path = './data/groundwater/bergstraße/stationdata_raw/gw_stations_bergstraße_raw.xlsx'
stations = pd.read_excel(station_path)#, sep=';')
stations

Unnamed: 0,Mst.-ID,Name,Kurzname,Betreiber,Ostwert,Nordwert,Rechtswert,Hochwert,TK-Blatt,Messstellenart,...,Sohltiefe,Filter-Oberkante,Filter-Unterkante,Rohrinnendurchmesser (mm),Grundwasserkörper,Hydrogeol. Teilraum,Stilllegungsdatum,Stilllegungsgrund,GewAnlagen-ID,Bohrarchiv HLNUG
0,12890,BIBLIS (alt),544047.0,Regierungspräsidium Darmstadt,458821.000,5505747.000,3458880.0,5507510.0,6216 - Gernsheim,BR,...,6.25,4.25,6.25,100,2395_3101,03101 - Rheingrabenscholle,"Aug 18, 2010",Ersatz durch Neubau,,225
1,12891,GROSS-ROHRHEIM,544017.0,Regierungspräsidium Darmstadt,462960.000,5505877.000,3463020.0,5507640.0,6216 - Gernsheim,BR,...,3.75,2.75,3.75,35,2395_3101,03101 - Rheingrabenscholle,"Aug 1, 1973",Sonstiges,,11
2,12892,NORDHEIM,544013.0,Regierungspräsidium Darmstadt,455982.000,5505917.000,3456040.0,5507680.0,6216 - Gernsheim,BR,...,7.00,6.00,7.00,35,2395_3101,03101 - Rheingrabenscholle,,,,7
3,12893,GROSS-ROHRHEIM,544016.0,Regierungspräsidium Darmstadt,461080.000,5505917.000,3461140.0,5507680.0,6216 - Gernsheim,BR,...,4.90,3.70,4.80,35,2395_3101,03101 - Rheingrabenscholle,"Jun 30, 1982",Sonstiges,,10
4,12894,NORDHEIM (alt),544014.0,Regierungspräsidium Darmstadt,457072.000,5505927.000,3457130.0,5507690.0,6216 - Gernsheim,BR,...,5.20,4.20,5.20,35,2395_3101,03101 - Rheingrabenscholle,"May 1, 2007",Ersatz durch Neubau,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,17129,VIERNHEIM,544273.0,,467596.000,5490910.000,3467658.0,5492667.0,6417 - Mannheim Nordost,BR,...,15.00,3.00,15.00,125,2394_3101,03101 - Rheingrabenscholle,,,,0
248,17130,LAMPERTHEIM,544274.0,Regierungspräsidium Darmstadt,465688.000,5492192.000,3465750.0,5493950.0,6417 - Mannheim Nordost,BR,...,12.80,8.00,12.00,125,2393_3101,03101 - Rheingrabenscholle,,,,0
249,17131,VIERNHEIM,544275.0,,469267.000,5491023.000,3469330.0,5492780.0,6417 - Mannheim Nordost,BR,...,11.00,7.00,11.00,125,2394_3101,03101 - Rheingrabenscholle,,,,0
250,17740,LAMPERTHEIM,,Regierungspräsidium Darmstadt,463546.000,5493941.000,3463607.0,5495700.0,6416 - Mannheim Nordwest,BR,...,15.00,9.00,15.00,125,2393_3101,03101 - Rheingrabenscholle,,,,0


The data needs some basic cleaning before further use.

In [27]:
# drop unnecessary columns
cols_to_keep = ['Mst.-ID', 'Ostwert', 'Nordwert', 
                'Geländehöhe', 'Messpunkthöhe']
stations = stations[cols_to_keep]

# assign new column names
new_col_names = {'Mst.-ID': 'station_id', 'Ostwert': 'x', 
                 'Nordwert': 'y', 'Geländehöhe': 'ground_elev', 
                 'Messpunkthöhe': 'measure_elev'}
stations = stations.rename(columns=new_col_names)
stations = stations.drop_duplicates()


In [28]:
stations.info()

<class 'pandas.core.frame.DataFrame'>
Index: 243 entries, 0 to 251
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   station_id    243 non-null    int64  
 1   x             243 non-null    float64
 2   y             243 non-null    float64
 3   ground_elev   243 non-null    float64
 4   measure_elev  243 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 11.4 KB


In [29]:
stations.head()

Unnamed: 0,station_id,x,y,ground_elev,measure_elev
0,12890,458821.0,5505747.0,87.44,87.95
1,12891,462960.0,5505877.0,89.96,90.01
2,12892,455982.0,5505917.0,89.1,89.4
3,12893,461080.0,5505917.0,88.17,88.03
4,12894,457072.0,5505927.0,88.13,88.49


## 2. Read groundwater measurement data to df

In [30]:
# Directory containing the folders
folder_path = './data/groundwater/bergstraße/stationdata_raw/'

# File name pattern to search for
file_pattern = 'Wasserstaende*.csv' 

df_list = []

# Check if the item in the directory is a folder
if os.path.isdir(folder_path):
    # Use glob to search for files matching the pattern inside the folder
    files = glob.glob(os.path.join(folder_path, file_pattern))
    
    # Process the found files
    for file in files:
        #print("Found file '{}' in folder '{}'".format(os.path.basename(file), folder))
        df = pd.read_csv(file, sep=';')
        df_list.append(df)


In [31]:
df_list[0].head()

Unnamed: 0,Mst.-ID,Kurzname,Name,Datum,Abstich (m unter MP),Wasserspiegel (m ü. NN),Wasserspiegel (m unter GOK),Wassertemperatur (°C),Unnamed: 8
0,13595,544240,LAMPERTHEIM,"Jan 1, 1979",9.48,89.76,8.45,0.0,
1,13595,544240,LAMPERTHEIM,"Jan 8, 1979",9.49,89.75,8.46,0.0,
2,13595,544240,LAMPERTHEIM,"Jan 15, 1979",9.89,89.35,8.86,0.0,
3,13595,544240,LAMPERTHEIM,"Jan 29, 1979",9.98,89.26,8.95,0.0,
4,13595,544240,LAMPERTHEIM,"Feb 5, 1979",10.0,89.24,8.97,0.0,


In [32]:
df_list[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25938 entries, 0 to 25937
Data columns (total 9 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Mst.-ID                      25938 non-null  int64  
 1   Kurzname                     25938 non-null  int64  
 2   Name                         25938 non-null  object 
 3   Datum                        25938 non-null  object 
 4   Abstich (m unter MP)         25938 non-null  float64
 5   Wasserspiegel (m ü. NN)      25938 non-null  float64
 6   Wasserspiegel (m unter GOK)  25938 non-null  float64
 7   Wassertemperatur (°C)        25938 non-null  float64
 8   Unnamed: 8                   0 non-null      float64
dtypes: float64(5), int64(2), object(2)
memory usage: 1.8+ MB


The data needs some basic cleaning before further use.

# continue here!!!

In [8]:
# clean column names
df = pd.concat(df_list)
#df.columns = df.columns.str.replace(' ', '')

# assign new column names
new_col_names = {'Messstellennummer': 'station_id', 
                 'Messstellenbezeichnung': 'name', 
                 'Datum': 'date', 
                 'Messpunkthöhe [NN+m]': 'elevation', 
                 'Abstich (m unter MPH)': 'water_depth', 
                 'Wasserstand (NN+m)': 'water_level'}
df = df.rename(columns=new_col_names)

# change date column to datetime type
df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')

In [9]:
df.name.unique()

array(['1053 Böbingen,', '1044 A Lustadt,Holzmühle', '1319 Dudenhofen,',
       '1307, Germersheim', '1059 Neustadt an der Weinstraße, Geinsheim',
       '1199 Flemlingen,', '1058 Neustadt an der Weinstraße, Speyerdorf',
       '1316 II, Römerberg, Mechtersheim', '1450, Essingen',
       '1303 II Lustadt,', '1132 Speyer, '], dtype=object)

In [10]:
df.describe()

Unnamed: 0,station_id,date,elevation,water_depth,water_level
count,21820.0,21820,21820.0,21820.0,21820.0
mean,2377712000.0,1991-07-07 19:03:05.444546304,112.831093,3.965391,108.865701
min,2377139000.0,1953-11-02 00:00:00,95.82,0.2,90.28
25%,2377150000.0,1979-06-21 00:00:00,98.82,2.13,96.83
50%,2377194000.0,1991-02-25 00:00:00,113.27,3.13,107.78
75%,2378175000.0,2004-12-13 00:00:00,116.88,5.49,112.47
max,2379143000.0,2024-02-28 00:00:00,201.32,15.9,185.76
std,605369.6,,15.928979,2.685128,14.969659


In [11]:
df.head()

Unnamed: 0,station_id,name,date,elevation,water_depth,water_level
0,2378135400,"1053 Böbingen,",1954-11-01,113.27,1.5,111.77
1,2378135400,"1053 Böbingen,",1954-11-08,113.27,1.47,111.8
2,2378135400,"1053 Böbingen,",1954-11-15,113.27,1.47,111.8
3,2378135400,"1053 Böbingen,",1954-11-22,113.27,1.45,111.82
4,2378135400,"1053 Böbingen,",1954-11-29,113.27,1.42,111.85


In [12]:
df.to_csv('./data/groundwater/pfalz/gw.csv', index=False)