# NRW Groundwater Data - OpenHygrisC

OpenHygrisC Data: https://www.opengeodata.nrw.de/produkte/umwelt_klima/wasser/grundwasser/hygrisc/

Download gw station data: https://www.opengeodata.nrw.de/produkte/umwelt_klima/wasser/grundwasser/hygrisc/OpenHygrisC_gw-messstellen-messwerte_EPSG25832_CSV.zip


In [1]:
import pandas as pd
import os

In [2]:
datapath = r"../data/original/OpenHygrisC_gw-messstellen-messwerte_EPSG25832_CSV/"
gw_station_fname = r"opendata.gw_messstelle.csv"
gw_station_pfname = datapath + "/" +  gw_station_fname
print(gw_station_pfname)

../data/original/OpenHygrisC_gw-messstellen-messwerte_EPSG25832_CSV//opendata.gw_messstelle.csv


In [3]:
os.listdir(datapath)

['Hinweise zu den bereitgestellten Grundwasserdaten.pdf',
 'katalog_gemeinde.csv',
 'katalog_stoff.csv',
 'opendata.gw_chemischer_messwert.csv',
 'opendata.gw_messstelle.csv']

In [4]:
df = gw_station_df = pd.read_csv(gw_station_pfname, sep = ";", index_col=["messstelle_id"] )

In [6]:
idx_coords_missing = (df["e32"].str.len() < 6) \
| (df["e32"].isnull() == True)
idx_is_numeric = (gw_station_df["e32"].str.isnumeric() == True) # is it integer?
# idx_is_not_numeric = ~idx_is_numeric # non-integers
idx_100m_prec = ~idx_is_numeric & ~idx_coords_missing

In [7]:
N_100m_prec = idx_100m_prec.value_counts()[True]

In [8]:
N_coords_missing = idx_coords_missing.value_counts()[True]

In [9]:
N_is_numeric = idx_is_numeric.value_counts()[True]

In [10]:
assert df.shape[0] == N_is_numeric + N_100m_prec + N_coords_missing, \
"Error: Some values not caught" 

In [11]:
gw_station_df.loc[idx_is_numeric,"genau"] = 1
gw_station_df.loc[idx_100m_prec,"genau"] = 100
gw_station_df.loc[idx_coords_missing,"genau"] = -999

In [12]:
# ["e32"] -> number -> ["e32num"]
gw_station_df.loc[idx_is_numeric, "n32num"] \
= gw_station_df.loc [idx_is_numeric, "n32"].astype("float") 
gw_station_df.loc[idx_100m_prec, "n32num"] \
= (gw_station_df.loc[idx_100m_prec, "n32"].str[:-2]+"50").astype("float") 
gw_station_df.loc[idx_coords_missing, "n32num"] = -999  

In [13]:
# ["e32"] -> number -> ["e32num"]
gw_station_df.loc[idx_is_numeric, "e32num"] \
= gw_station_df.loc [idx_is_numeric, "e32"].astype("float") 
gw_station_df.loc[idx_100m_prec, "e32num"] \
= (gw_station_df.loc[idx_100m_prec, "e32"].str[:-2]+"50").astype("float") 
gw_station_df.loc[idx_coords_missing, "e32num"] = -999  

In [14]:
gw_station_df.loc[idx_coords_missing,["n32","n32num", "genau"]]

Unnamed: 0_level_0,n32,n32num,genau
messstelle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
118820000,xx,-999.0,-999.0
47202002,xx,-999.0,-999.0
118840009,xx,-999.0,-999.0
118260005,xx,-999.0,-999.0
68013504,xx,-999.0,-999.0
68012007,xx,-999.0,-999.0
47247101,xx,-999.0,-999.0
68013401,xx,-999.0,-999.0
118880007,xx,-999.0,-999.0
47299009,xx,-999.0,-999.0


In [16]:
gw_station_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71120 entries, 32505929 to 46201920
Data columns (total 41 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   sl_nr                         71120 non-null  int64  
 1   name                          71120 non-null  object 
 2   e32                           71119 non-null  object 
 3   n32                           71119 non-null  object 
 4   gw_stockwerk                  54186 non-null  float64
 5   grundstueck                   71120 non-null  object 
 6   gemeinde_id                   71090 non-null  object 
 7   gwhorizont_id                 28424 non-null  object 
 8   gwhorizont                    28424 non-null  object 
 9   gwleiter_id                   2690 non-null   object 
 10  gwleiter                      2690 non-null   object 
 11  einrichtungsgrund             71120 non-null  object 
 12  gwk_lage_auf_id               70486 non-null  obje

In [18]:
# Correct installation bug ocuuring when using OSGeo4W
import os
proj_lib = os.environ['proj_lib']
print(proj_lib)
conda_prefix = os.environ['conda_prefix']
os.environ['proj_lib'] = conda_prefix + r"\Library\share\proj"
proj_lib = os.environ['proj_lib']
print(proj_lib)

C:\OSGeo4W64\share\proj
C:\Users\rb\Anaconda3\envs\geo\Library\share\proj


In [19]:
import geopandas as gpd
from shapely.geometry import Point

In [20]:
idx = gw_station_df["genau"] > 0

In [21]:
df2 = gw_station_df[idx]
# gw_station_df.loc[idx,["e32","n32","e32num","n32num","genau"]]

In [22]:
df2.head()

Unnamed: 0_level_0,sl_nr,name,e32,n32,gw_stockwerk,grundstueck,gemeinde_id,gwhorizont_id,gwhorizont,gwleiter_id,...,filterlaenge_cm,sumpfrohrlaenge_cm,ausbaudurchmesser_mm,historischer_ruhe_wsp,einbaulaenge_cm,oberkante_filter_cm,unterkante_filter_cm,genau,n32num,e32num
messstelle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32505929,67530,UWB-Ddorf 01285,343064,5678019,1.0,,05111000,,,,...,,,,,,,,1.0,5678019.0,343064.0
10446746,51044,60GP012303,292077,5645349,,privat,NL000882,5,Zwischenmittel,,...,200.0,,,,16893.0,-3333.0,-3533.0,1.0,5645349.0,292077.0
87005323,51070,58BP024606,287141,5684635,,privat,NL001640,6D,Neurather Sand,,...,500.0,300.0,,,32667.0,-29083.0,-29583.0,1.0,5684635.0,287141.0
80000290,68442,Krinsend 0079 neu,309490,5678060,1.0,,05166024,,,,...,,,,,1517.0,4832.0,4832.0,1.0,5678060.0,309490.0
86583852,68518,WG102GM93-3,316741,5680237,,privat,05166032,,,,...,200.0,,80.0,,5400.0,1413.0,1213.0,1.0,5680237.0,316741.0


In [23]:
gdf = gpd.GeoDataFrame(df2, \
                       geometry=gpd.points_from_xy(df2.e32num, df2.n32num), \
                       crs="EPSG:25832")

In [24]:
gdf.head()

Unnamed: 0_level_0,sl_nr,name,e32,n32,gw_stockwerk,grundstueck,gemeinde_id,gwhorizont_id,gwhorizont,gwleiter_id,...,sumpfrohrlaenge_cm,ausbaudurchmesser_mm,historischer_ruhe_wsp,einbaulaenge_cm,oberkante_filter_cm,unterkante_filter_cm,genau,n32num,e32num,geometry
messstelle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32505929,67530,UWB-Ddorf 01285,343064,5678019,1.0,,05111000,,,,...,,,,,,,1.0,5678019.0,343064.0,POINT (343064.000 5678019.000)
10446746,51044,60GP012303,292077,5645349,,privat,NL000882,5,Zwischenmittel,,...,,,,16893.0,-3333.0,-3533.0,1.0,5645349.0,292077.0,POINT (292077.000 5645349.000)
87005323,51070,58BP024606,287141,5684635,,privat,NL001640,6D,Neurather Sand,,...,300.0,,,32667.0,-29083.0,-29583.0,1.0,5684635.0,287141.0,POINT (287141.000 5684635.000)
80000290,68442,Krinsend 0079 neu,309490,5678060,1.0,,05166024,,,,...,,,,1517.0,4832.0,4832.0,1.0,5678060.0,309490.0,POINT (309490.000 5678060.000)
86583852,68518,WG102GM93-3,316741,5680237,,privat,05166032,,,,...,,80.0,,5400.0,1413.0,1213.0,1.0,5680237.0,316741.0,POINT (316741.000 5680237.000)


In [None]:
gdf.to_file("GW_Stations.gpkg", layer='GW Stations', driver="GPKG")