# BAFU dataset extraction

Author: Thiago Nascimento (thiago.nascimento@eawag.ch)

This notebook is used to retrieve and concatenate the NAWA dataset. They present indeed different time-resolutions so not necessarly daily and hourly, but in different campaings. 

The output is one file per catchemnt (similar to the CAMELS_CH), with 44 columns:

column_name_mapping = {
    "DOC": "doc(mg/l)",
    "Nitrat-Stickstoff": "nitrate_nitrogen(mgN/l)",
    "Elektrische Leitfähigkeit": "electrical_conductivity(µS/cm)",
    "Nitrit-Stickstoff": "nitrite_nitrogen(mgN/l)",
    "ortho-Phosphat-Phosphor (filtriert)": "ortho_phosphate_phosphorus_filtered(mg/l)",
    "Ammonium-Stickstoff": "ammonium_nitrogen(mgN/l)",
    "pH-Wert": "ph_value",
    "Wassertemperatur": "water_temperature(°C)",
    "Abfluss Tagesmaximum": "daily_maximum_discharge(m3/s)",
    "Gesamtstickstoff (unfiltriert)": "total_nitrogen_unfiltered(mg/l)",
    "Abfluss Tagesmittel": "daily_mean_discharge(m3/s)",
    "Sauerstoff": "oxygen(mg/l)",
    "Chlorid": "chloride(mg/l)",
    "Gesamtphosphor (unfiltriert)": "total_phosphorus_unfiltered(mg/l)",
    "Abfluss Tagesminimum": "daily_minimum_discharge(m3/s)",
    "Nitrit": "nitrite(mg/l)",
    "Ammonium": "ammonium(mg/l)",
    "Nitrat": "nitrate(mg/l)",
    "ortho-Phosphat": "ortho_phosphate(mg/l)",
    "Abfluss": "discharge(m3/s)",
    "Sauerstoff-Sättigung": "oxygen_saturation(%)",
    "Trübung": "turbidity(NTU)",
    "Bromid": "bromide(mg/l)",
    "Fluorid": "fluoride(mg/l)",
    "Sulfat": "sulfate(mg/l)",
    "MTBE": "mtbe(µg/l)",
    "ETBE": "etbe(µg/l)",
    "Tetrahydrofuran": "tetrahydrofuran(µg/l)",
    "1,4-Dioxan": "1_4_dioxane(µg/l)",
    "Hexachlorethan": "hexachloroethane(µg/l)",
    "1,1,1-Trichlorethan": "1_1_1_trichloroethane(µg/l)",
    "Tetrachlormethan": "carbon_tetrachloride(µg/l)",
    "Hexachlorbutadien": "hexachlorobutadiene(µg/l)",
    "Trichlorethen": "trichloroethene(µg/l)",
    "Trichlorfluormethan": "trichlorofluoromethane(µg/l)",
    "Tribrommethan": "tribromomethane(µg/l)",
    "1,3,5-Trichlorbenzol": "1_3_5_trichlorobenzene(µg/l)",
    "1,2,4-Trichlorbenzol": "1_2_4_trichlorobenzene(µg/l)",
    "1,2,3-Trichlorbenzol": "1_2_3_trichlorobenzene(µg/l)",
    "1,1-Dichlorethen": "1_1_dichloroethene(µg/l)",
    "Trichlormethan": "chloroform(µg/l)",
    "trans-1,3-Dichlorpropen": "trans_1_3_dichloropropene(µg/l)",
    "1,1,2,2-Tetrachlorethan": "1_1_2_2_tetrachloroethane(µg/l)",
    "Dichlormethan": "dichloromethane(µg/l)",
    "1,2-Dichlorethan": "1_2_dichloroethane(µg/l)",
    "1,2-Dichlorpropan": "1_2_dichloropropane(µg/l)",
    "Bromdichlormethan": "bromodichloromethane(µg/l)",
    "cis-1,3-Dichlorpropen": "cis_1_3_dichloropropene(µg/l)",
    "1,1,2-Trichlorethan": "1_1_2_trichloroethane(µg/l)",
    "Dibromchlormethan": "dibromochloromethane(µg/l)",
    "1,3-Dichlorbenzol": "1_3_dichlorobenzene(µg/l)",
    "1,4-Dichlorbenzol": "1_4_dichlorobenzene(µg/l)",
    "1,2-Dichlorbenzol": "1_2_dichlorobenzene(µg/l)",
    "Tetrachlorethen": "tetrachloroethene(µg/l)",
    "trans-1,2-Dichlorethen": "trans_1_2_dichloroethene(µg/l)",
    "1,1-Dichlorethan": "1_1_dichloroethane(µg/l)",
    "Arsen (gelöst)": "arsenic_dissolved(µg/l)",
    "Cr- gelöst": "cr_dissolved(µg/l)",
    "Pb-gel.": "pb_dissolved(µg/l)",
    "Cadmium (gelöst)": "cadmium_dissolved(µg/l)",
    "Hg-gel.": "hg_dissolved(µg/l)",
    "TOC": "toc(mg/l)",
    "BTEX (total)": "btex_total(µg/l)",
    "Halogenkohlenwasserstoffe": "halogenated_hydrocarbons(µg/l)",
    "TAME": "tame(µg/l)",
    "1,2,3-Trimethylbenzol": "1_2_3_trimethylbenzene(µg/l)",
    "1,2,4-Trimethylbenzol": "1_2_4_trimethylbenzene(µg/l)",
    "1,3,5-Trimethylbenzol": "1_3_5_trimethylbenzene(µg/l)",
    "o-Xylol": "o_xylene(µg/l)",
    "m/p-Xylol": "m_p_xylene(µg/l)",
    "Ethylbenzol": "ethylbenzene(µg/l)",
    "Toluol": "toluene(µg/l)",
    "Benzol": "benzene(µg/l)",
    "Chlorbenzol": "chlorobenzene(µg/l)",
    "Diglyme": "diglyme(µg/l)",
    "Koffein": "caffeine(µg/l)",
    "Surfynol": "surfynol(µg/l)",
    "1,3-Dimethylaprobarbital": "1_3_dimethylaprobarbital(µg/l)",
    "Crotetamid": "crotetamide(µg/l)",
    "Crotamiton": "crotamiton(µg/l)",
    "Cropropamid": "cropropamide(µg/l)",
    "Sr- gelöst": "sr_dissolved(µg/l)",
    "Ba- gelöst": "ba_dissolved(µg/l)",
    "B- gelöst": "b_dissolved(µg/l)",
    "Kupfer (gelöst)": "copper_dissolved(µg/l)",
    "Zn-gel.": "zn_dissolved(µg/l)",
    "Nickel (gelöst)": "nickel_dissolved(µg/l)",
    "cis-1,2-Dichlorethen": "cis_1_2_dichloroethene(µg/l)",
    "Lufttemperatur": "air_temperature(°C)",
    "TFA": "tfa(µg/l)",
    "Wasserstand": "water_level(mu.M.)",
    "BSB5": "bod5(mg/l)",
    "Schwebstoffe": "suspended_solids(mg/l)",
    "Gesamtphosphor (filtriert)": "total_phosphorus_filtered(mg/l)",
    "Al- gelöst": "al_dissolved(µg/l)",
    "Ag gel.": "ag_dissolved(µg/l)",
    "Co- gelöst": "co_dissolved(µg/l)",
    "Zinn (gelöst)": "tin_dissolved(µg/l)",
    "Eisen (gelöst)": "iron_dissolved(µg/l)",
    "Mangan (gelöst)": "manganese_dissolved(µg/l)",
    "Molybdän gelöst": "molybdenum_dissolved(µg/l)",
    "Se- gelöst": "se_dissolved(µg/l)",
    "Titan (gelöst)": "titanium_dissolved(µg/l)",
    "Uran gel.": "uranium_dissolved(µg/l)",
    "Calcium": "calcium(mg/l)",
    "Magnesium": "magnesium(mg/l)",
    "Kalium": "potassium(mg/l)",
    "Natrium": "sodium(mg/l)",
    "SiO2": "sio2(mg/l)",
    "Karbonathärte": "carbonate_hardness(mmol/l)"
}


## Requirements
**Python:**

* Python>=3.6
* Jupyter
* geopandas=0.10.2
* numpy
* os
* pandas=2.1.3
* scipy=1.9.0
* tqdm

Check the Github repository for an environment.yml (for conda environments) or requirements.txt (pip) file.

**Files:**

* 


**Directory:**

* Clone the GitHub directory locally
* Place any third-data variables in their respective directory.
* ONLY update the "PATH" variable in the section "Configurations", with their relative path to the EStreams directory. 


## References
* 
## Observations
* 

# Import modules

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import tqdm as tqdm
import os
import glob
import warnings
import re

# Configurations

In [2]:
# Only editable variables:
# Relative path to your local directory
PATH = ".."
# Suppress all warnings
warnings.filterwarnings("ignore")

* #### The users should NOT change anything in the code below here. 

In [3]:
# Non-editable variables:
PATH_OUTPUT = "results/interval_samples/"

# Set the directory:
os.chdir(PATH)

# Import data
* FULL dataset

In [14]:
# Full dataset of interval (time-series)
dataset_nawa = pd.read_csv(r"data/NAWA/NAWA_2011-2022_N„hrstoffe_20231205.CSV", sep=";")
dataset_nawa

Unnamed: 0,Messstelle ID,Messstelle Name,Probenahme Ort,Probenahme Art,NAQUA Probenahme Datum,NAQUA Probenahme Uhrzeit,NAWA Probenahme Beginn (Datum und Uhrzeit),NAWA Probenahme Ende (Datum und Uhrzeit),NAWA Probenahme Dauer (Stunden),Labor,...,Parameter,Messwert,Bestimmungsgrenze,NAWA Nachweisgrenze,Einheit,Messunsicherheit absolut/relativ,Messunsicherheit,Gerät/Methode,Bemerkung Messwert,Status
0,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,DOC,1.02,0.240000,,mg/l,,,---,,"Freigegeben, validierte Daten"
1,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,Nitrat-Stickstoff,1.63,0.032000,,mg/l,,,---,,"Freigegeben, validierte Daten"
2,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,Elektrische Leitfähigkeit,480,,,µS/cm,,,---,,"Freigegeben, validierte Daten"
3,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,Nitrit-Stickstoff,0.011,0.001000,,mg/l,,,---,,"Freigegeben, validierte Daten"
4,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,ortho-Phosphat-Phosphor (filtriert),0.002,0.002000,,mg/l,,,---,,"Freigegeben, validierte Daten"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302130,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,Wassertemperatur,7.12,,,°C,,,---,,"Freigegeben, provisorische Daten"
302131,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,Elektrische Leitfähigkeit,341.4,,,µS20,,,---,,"Freigegeben, validierte Daten"
302132,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,Sauerstoff-Sättigung,93,,,%,,,---,,"Nicht freigegeben, Rohdaten"
302133,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,ortho-Phosphat-Phosphor (filtriert),0.032,0.003000,,mg/l,,,---,,"Freigegeben, validierte Daten"


- Network

In [6]:
# Network NADUF
network_nawa = pd.read_excel(r"data/CAMELS_CH_chem_stations_short_v2.xlsx", sheet_name='nawa')
network_nawa

Unnamed: 0,nawa_id,nawa_station,nawa_water_body,longitude_LV03,lattitude_LV03,area_camels_CH [km2],hydro_naduf_distance [km],remarks,Q_weighting (catchment areas from CAMELS_CH)
0,1837,Porte du Scex,Rhone,557660,133280,5239.402096,0.0,ok,1.000000
1,4070,Sion,Rhone,593277,118449,3372.417040,0.0,ok,1.000005
2,1833,Brugg,Aare,657000,259360,11681.282882,0.0,ok,0.999999
3,1339,Gebenstorf,Reuss,659450,258850,3420.503458,10.0,ok,1.010250
4,1852,Brienzerseeeinlauf,Aare,646692,177000,555.808970,3.3,ok,1.001097
...,...,...,...,...,...,...,...,...,...
71,2123,Le Rancho,Promenthouse,510118,140026,119.773318,0.0,ok,0.999777
72,6057,Vicques,Scheulte,599485,244150,72.695840,0.0,ok,0.999943
73,2078,"Weil, Palmrainbrücke",Rhein,611740,272310,36404.416302,0.0,ok,1.000000
74,1181,Emmen-Littau,Kleine Emme,663917,213356,478.277165,0.6,ok,1.000188


### Renaming the columns

In [15]:
column_name_mapping = {
    "DOC": "doc(mg/l)",
    "Nitrat-Stickstoff": "nitrate_nitrogen(mgN/l)",
    "Elektrische Leitfähigkeit": "electrical_conductivity(µS/cm)",
    "Nitrit-Stickstoff": "nitrite_nitrogen(mgN/l)",
    "ortho-Phosphat-Phosphor (filtriert)": "ortho_phosphate_phosphorus_filtered(mg/l)",
    "Ammonium-Stickstoff": "ammonium_nitrogen(mgN/l)",
    "pH-Wert": "ph_value()",
    "Wassertemperatur": "temperature(°C)",
    "Abfluss Tagesmaximum": "daily_maximum_discharge(m3/s)",
    "Gesamtstickstoff (unfiltriert)": "total_nitrogen_unfiltered(mg/l)",
    "Abfluss Tagesmittel": "daily_mean_discharge(m3/s)",
    "Sauerstoff": "oxygen(mg/l)",
    "Chlorid": "chloride(mg/l)",
    "Gesamtphosphor (unfiltriert)": "total_phosphorus_unfiltered(mg/l)",
    "Abfluss Tagesminimum": "daily_minimum_discharge(m3/s)",
    "Nitrit": "nitrite(mg/l)",
    "Ammonium": "ammonium(mg/l)",
    "Nitrat": "nitrate(mg/l)",
    "ortho-Phosphat": "ortho_phosphate(mg/l)",
    "Abfluss": "discharge(m3/s)",
    "Sauerstoff-Sättigung": "oxygen_saturation(%)",
    "Trübung": "turbidity(NTU)",
    "Bromid": "bromide(mg/l)",
    "Fluorid": "fluoride(mg/l)",
    "Sulfat": "sulfate(mg/l)",
    "MTBE": "mtbe(µg/l)",
    "ETBE": "etbe(µg/l)",
    "Tetrahydrofuran": "tetrahydrofuran(µg/l)",
    "1,4-Dioxan": "1_4_dioxane(µg/l)",
    "Hexachlorethan": "hexachloroethane(µg/l)",
    "1,1,1-Trichlorethan": "1_1_1_trichloroethane(µg/l)",
    "Tetrachlormethan": "carbon_tetrachloride(µg/l)",
    "Hexachlorbutadien": "hexachlorobutadiene(µg/l)",
    "Trichlorethen": "trichloroethene(µg/l)",
    "Trichlorfluormethan": "trichlorofluoromethane(µg/l)",
    "Tribrommethan": "tribromomethane(µg/l)",
    "1,3,5-Trichlorbenzol": "1_3_5_trichlorobenzene(µg/l)",
    "1,2,4-Trichlorbenzol": "1_2_4_trichlorobenzene(µg/l)",
    "1,2,3-Trichlorbenzol": "1_2_3_trichlorobenzene(µg/l)",
    "1,1-Dichlorethen": "1_1_dichloroethene(µg/l)",
    "Trichlormethan": "chloroform(µg/l)",
    "trans-1,3-Dichlorpropen": "trans_1_3_dichloropropene(µg/l)",
    "1,1,2,2-Tetrachlorethan": "1_1_2_2_tetrachloroethane(µg/l)",
    "Dichlormethan": "dichloromethane(µg/l)",
    "1,2-Dichlorethan": "1_2_dichloroethane(µg/l)",
    "1,2-Dichlorpropan": "1_2_dichloropropane(µg/l)",
    "Bromdichlormethan": "bromodichloromethane(µg/l)",
    "cis-1,3-Dichlorpropen": "cis_1_3_dichloropropene(µg/l)",
    "1,1,2-Trichlorethan": "1_1_2_trichloroethane(µg/l)",
    "Dibromchlormethan": "dibromochloromethane(µg/l)",
    "1,3-Dichlorbenzol": "1_3_dichlorobenzene(µg/l)",
    "1,4-Dichlorbenzol": "1_4_dichlorobenzene(µg/l)",
    "1,2-Dichlorbenzol": "1_2_dichlorobenzene(µg/l)",
    "Tetrachlorethen": "tetrachloroethene(µg/l)",
    "trans-1,2-Dichlorethen": "trans_1_2_dichloroethene(µg/l)",
    "1,1-Dichlorethan": "1_1_dichloroethane(µg/l)",
    "Arsen (gelöst)": "arsenic_dissolved(µg/l)",
    "Cr- gelöst": "cr_dissolved(µg/l)",
    "Pb-gel.": "pb_dissolved(µg/l)",
    "Cadmium (gelöst)": "cadmium_dissolved(µg/l)",
    "Hg-gel.": "hg_dissolved(µg/l)",
    "TOC": "toc(mg/l)",
    "BTEX (total)": "btex_total(µg/l)",
    "Halogenkohlenwasserstoffe": "halogenated_hydrocarbons(µg/l)",
    "TAME": "tame(µg/l)",
    "1,2,3-Trimethylbenzol": "1_2_3_trimethylbenzene(µg/l)",
    "1,2,4-Trimethylbenzol": "1_2_4_trimethylbenzene(µg/l)",
    "1,3,5-Trimethylbenzol": "1_3_5_trimethylbenzene(µg/l)",
    "o-Xylol": "o_xylene(µg/l)",
    "m/p-Xylol": "m_p_xylene(µg/l)",
    "Ethylbenzol": "ethylbenzene(µg/l)",
    "Toluol": "toluene(µg/l)",
    "Benzol": "benzene(µg/l)",
    "Chlorbenzol": "chlorobenzene(µg/l)",
    "Diglyme": "diglyme(µg/l)",
    "Koffein": "caffeine(µg/l)",
    "Surfynol": "surfynol(µg/l)",
    "1,3-Dimethylaprobarbital": "1_3_dimethylaprobarbital(µg/l)",
    "Crotetamid": "crotetamide(µg/l)",
    "Crotamiton": "crotamiton(µg/l)",
    "Cropropamid": "cropropamide(µg/l)",
    "Sr- gelöst": "sr_dissolved(µg/l)",
    "Ba- gelöst": "ba_dissolved(µg/l)",
    "B- gelöst": "b_dissolved(µg/l)",
    "Kupfer (gelöst)": "copper_dissolved(µg/l)",
    "Zn-gel.": "zn_dissolved(µg/l)",
    "Nickel (gelöst)": "nickel_dissolved(µg/l)",
    "cis-1,2-Dichlorethen": "cis_1_2_dichloroethene(µg/l)",
    "Lufttemperatur": "air_temperature(°C)",
    "TFA": "tfa(µg/l)",
    "Wasserstand": "water_level(mu.M.)",
    "BSB5": "bod5(mg/l)",
    "Schwebstoffe": "suspended_solids(mg/l)",
    "Gesamtphosphor (filtriert)": "total_phosphorus_filtered(mg/l)",
    "Al- gelöst": "al_dissolved(µg/l)",
    "Ag gel.": "ag_dissolved(µg/l)",
    "Co- gelöst": "co_dissolved(µg/l)",
    "Zinn (gelöst)": "tin_dissolved(µg/l)",
    "Eisen (gelöst)": "iron_dissolved(µg/l)",
    "Mangan (gelöst)": "manganese_dissolved(µg/l)",
    "Molybdän gelöst": "molybdenum_dissolved(µg/l)",
    "Se- gelöst": "se_dissolved(µg/l)",
    "Titan (gelöst)": "titanium_dissolved(µg/l)",
    "Uran gel.": "uranium_dissolved(µg/l)",
    "Calcium": "calcium(mg/l)",
    "Magnesium": "magnesium(mg/l)",
    "Kalium": "potassium(mg/l)",
    "Natrium": "sodium(mg/l)",
    "SiO2": "sio2(mg/l)",
    "Karbonathärte": "carbonate_hardness(mmol/l)"
}

In [16]:
dataset_nawa['Parameter'] = dataset_nawa['Parameter'].replace(column_name_mapping)
dataset_nawa

Unnamed: 0,Messstelle ID,Messstelle Name,Probenahme Ort,Probenahme Art,NAQUA Probenahme Datum,NAQUA Probenahme Uhrzeit,NAWA Probenahme Beginn (Datum und Uhrzeit),NAWA Probenahme Ende (Datum und Uhrzeit),NAWA Probenahme Dauer (Stunden),Labor,...,Parameter,Messwert,Bestimmungsgrenze,NAWA Nachweisgrenze,Einheit,Messunsicherheit absolut/relativ,Messunsicherheit,Gerät/Methode,Bemerkung Messwert,Status
0,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,doc(mg/l),1.02,0.240000,,mg/l,,,---,,"Freigegeben, validierte Daten"
1,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrate_nitrogen(mgN/l),1.63,0.032000,,mg/l,,,---,,"Freigegeben, validierte Daten"
2,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,electrical_conductivity(µS/cm),480,,,µS/cm,,,---,,"Freigegeben, validierte Daten"
3,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrite_nitrogen(mgN/l),0.011,0.001000,,mg/l,,,---,,"Freigegeben, validierte Daten"
4,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,ortho_phosphate_phosphorus_filtered(mg/l),0.002,0.002000,,mg/l,,,---,,"Freigegeben, validierte Daten"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302130,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,water_temperature(°C),7.12,,,°C,,,---,,"Freigegeben, provisorische Daten"
302131,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,electrical_conductivity(µS/cm),341.4,,,µS20,,,---,,"Freigegeben, validierte Daten"
302132,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,oxygen_saturation(%),93,,,%,,,---,,"Nicht freigegeben, Rohdaten"
302133,2078,"Weil, Palmrainbrücke","Weil, Ansaugstelle MS=Mischw.",Stichprobe,,,,07.12.2021 08:01,,,...,ortho_phosphate_phosphorus_filtered(mg/l),0.032,0.003000,,mg/l,,,---,,"Freigegeben, validierte Daten"


In [19]:
dataset = dataset_nawa[dataset_nawa["Messstelle ID"] == 1002]
dataset

Unnamed: 0,Messstelle ID,Messstelle Name,Probenahme Ort,Probenahme Art,NAQUA Probenahme Datum,NAQUA Probenahme Uhrzeit,NAWA Probenahme Beginn (Datum und Uhrzeit),NAWA Probenahme Ende (Datum und Uhrzeit),NAWA Probenahme Dauer (Stunden),Labor,...,Parameter,Messwert,Bestimmungsgrenze,NAWA Nachweisgrenze,Einheit,Messunsicherheit absolut/relativ,Messunsicherheit,Gerät/Methode,Bemerkung Messwert,Status
0,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,doc(mg/l),1.02,0.240000,,mg/l,,,---,,"Freigegeben, validierte Daten"
1,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrate_nitrogen(mgN/l),1.63,0.032000,,mg/l,,,---,,"Freigegeben, validierte Daten"
2,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,electrical_conductivity(µS/cm),480,,,µS/cm,,,---,,"Freigegeben, validierte Daten"
3,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrite_nitrogen(mgN/l),0.011,0.001000,,mg/l,,,---,,"Freigegeben, validierte Daten"
4,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,ortho_phosphate_phosphorus_filtered(mg/l),0.002,0.002000,,mg/l,,,---,,"Freigegeben, validierte Daten"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296639,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,daily_minimum_discharge(m3/s),0.32,,,m³/s,,,---,,"Freigegeben, validierte Daten"
296640,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,daily_mean_discharge(m3/s),0.32,,,m³/s,,,---,,"Freigegeben, validierte Daten"
296641,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,nitrate(mg/l),6.29,0.141664,,mg/l,,,---,,"Freigegeben, validierte Daten"
296642,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,nitrite(mg/l),<0.003,0.003285,,mg/l,,,---,,"Freigegeben, validierte Daten"


In [30]:
df = dataset
df

Unnamed: 0,Messstelle ID,Messstelle Name,Probenahme Ort,Probenahme Art,NAQUA Probenahme Datum,NAQUA Probenahme Uhrzeit,NAWA Probenahme Beginn (Datum und Uhrzeit),NAWA Probenahme Ende (Datum und Uhrzeit),NAWA Probenahme Dauer (Stunden),Labor,...,Parameter,Messwert,Bestimmungsgrenze,NAWA Nachweisgrenze,Einheit,Messunsicherheit absolut/relativ,Messunsicherheit,Gerät/Methode,Bemerkung Messwert,Status
0,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,doc(mg/l),1.02,0.240000,,mg/l,,,---,,"Freigegeben, validierte Daten"
1,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrate_nitrogen(mgN/l),1.63,0.032000,,mg/l,,,---,,"Freigegeben, validierte Daten"
2,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,electrical_conductivity(µS/cm),480,,,µS/cm,,,---,,"Freigegeben, validierte Daten"
3,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,nitrite_nitrogen(mgN/l),0.011,0.001000,,mg/l,,,---,,"Freigegeben, validierte Daten"
4,1002,"Rämismühle, Zell",---,Stichprobe,,,,09.03.2011 09:09,,,...,ortho_phosphate_phosphorus_filtered(mg/l),0.002,0.002000,,mg/l,,,---,,"Freigegeben, validierte Daten"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296639,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,daily_minimum_discharge(m3/s),0.32,,,m³/s,,,---,,"Freigegeben, validierte Daten"
296640,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,daily_mean_discharge(m3/s),0.32,,,m³/s,,,---,,"Freigegeben, validierte Daten"
296641,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,nitrate(mg/l),6.29,0.141664,,mg/l,,,---,,"Freigegeben, validierte Daten"
296642,1002,"Rämismühle, Zell",---,Stichprobe,,,,17.11.2021 11:50,,,...,nitrite(mg/l),<0.003,0.003285,,mg/l,,,---,,"Freigegeben, validierte Daten"


In [85]:
df = dataset
# Pivot the DataFrame
pivoted_df = df.pivot_table(
    index=['NAWA Probenahme Ende (Datum und Uhrzeit)'],
    columns='Parameter',
    values='Messwert',
    aggfunc='first'  # or another appropriate aggregation function
).reset_index()

# Convert the 'NAWA Probenahme Beginn (Datum und Uhrzeit)' column to datetime
pivoted_df['date'] = pd.to_datetime(pivoted_df['NAWA Probenahme Ende (Datum und Uhrzeit)'], format='%d.%m.%Y %H:%M')

pivoted_df.set_index("date", inplace=True)
pivoted_df.sort_index(inplace=True)
pivoted_df

Parameter,NAWA Probenahme Ende (Datum und Uhrzeit),ammonium(mg/l),ammonium_nitrogen(mgN/l),chloride(mg/l),daily_maximum_discharge(m3/s),daily_mean_discharge(m3/s),daily_minimum_discharge(m3/s),doc(mg/l),electrical_conductivity(µS/cm),nitrate(mg/l),nitrate_nitrogen(mgN/l),nitrite(mg/l),nitrite_nitrogen(mgN/l),ortho_phosphate(mg/l),ortho_phosphate_phosphorus_filtered(mg/l),oxygen(mg/l),ph_value,total_nitrogen_unfiltered(mg/l),total_phosphorus_unfiltered(mg/l),water_temperature(°C)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011-01-12 10:02:00,12.01.2011 10:02,<0.009,<0.007,7.27,6.73,6.14,5.88,1.59,467,6.33,1.43,0.016,0.005,,0.005,11.7,8.5,1.49,<0.011,4.8
2011-02-09 09:15:00,09.02.2011 09:15,0.032,0.025,11.6,1.34,1.25,1.17,1.23,463,8.19,1.85,0.033,0.01,,0.004,12.5,8.4,2.07,<0.011,1.9
2011-03-09 09:09:00,09.03.2011 09:09,0.035,0.027,9.61,1.29,1.26,1.23,1.02,480,7.22,1.63,0.036,0.011,,0.002,12.3,8.4,1.85,<0.011,4.7
2011-04-06 09:20:00,06.04.2011 09:20,0.018,0.014,7.19,2.58,2.19,1.82,1.37,469,6.37,1.44,0.02,0.006,,0.002,11.1,8.4,1.77,<0.011,8.4
2011-05-11 09:29:00,11.05.2011 09:29,0.037,0.029,10.2,0.45,0.34,0.32,0.71,493,7.5,1.7,0.039,0.012,,0.004,10.7,8.1,1.95,<0.011,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-24 11:55:00,24.08.2022 11:55,0.048,0.037,13.9,0.3,0.29,0.28,1.71,500,6.15,1.39,0.026,0.008,0.018,0.006,10.2,8,1.38,<0.011,15.5
2022-09-28 08:30:00,28.09.2022 08:30,0.026,0.02,4.7,28.22,16.67,4.95,3.68,406,5,1.13,<0.003,0.001,0.028,0.009,10.6,8.5,1.57,0.028,10.1
2022-10-19 08:30:00,19.10.2022 08:30,0.014,0.011,13.8,0.85,0.78,0.73,1.58,495,6.42,1.45,<0.003,<0.001,<0.006,0.002,10.3,8.2,1.43,<0.011,11.7
2022-11-16 08:25:00,16.11.2022 08:25,0.032,0.025,14.4,1.18,1.07,0.97,2.29,488,6.46,1.46,0.01,0.003,0.009,0.003,10.4,8.3,1.48,<0.011,10.3


In [74]:
pivoted_df.index.tolist()[120:]

['21.04.2021 11:00',
 '21.10.2020 10:10',
 '21.11.2012 10:15',
 '21.11.2018 08:15',
 '22.02.2012 10:30',
 '22.08.2018 08:10',
 '23.01.2013 10:30',
 '23.02.2022 12:10',
 '23.09.2020 11:15',
 '24.01.2018 11:45',
 '24.08.2022 11:55',
 '24.10.2012 11:04',
 '25.06.2014 11:25',
 '26.01.2022 08:35',
 '26.09.2018 08:00',
 '27.04.2022 12:50',
 '27.05.2020 11:10',
 '27.06.2012 10:48',
 '28.09.2022 08:30',
 '29.01.2014 11:00',
 '29.01.2020 08:20',
 '29.04.2020 09:40',
 '29.06.2022 08:25',
 '30.05.2018 08:10']

In [63]:
pivoted_df.loc['09.03.2011 09:09']

Parameter
ammonium(mg/l)                                0.035
ammonium_nitrogen(mgN/l)                      0.027
chloride(mg/l)                                 9.61
daily_maximum_discharge(m3/s)                  1.29
daily_mean_discharge(m3/s)                     1.26
daily_minimum_discharge(m3/s)                  1.23
doc(mg/l)                                      1.02
electrical_conductivity(µS/cm)                  480
nitrate(mg/l)                                  7.22
nitrate_nitrogen(mgN/l)                        1.63
nitrite(mg/l)                                 0.036
nitrite_nitrogen(mgN/l)                       0.011
ortho_phosphate(mg/l)                           NaN
ortho_phosphate_phosphorus_filtered(mg/l)     0.002
oxygen(mg/l)                                   12.3
ph_value                                        8.4
total_nitrogen_unfiltered(mg/l)                1.85
total_phosphorus_unfiltered(mg/l)            <0.011
water_temperature(°C)                           4.7
Na

In [24]:
import pandas as pd

# Assuming df is your DataFrame
# Example DataFrame structure for reference
data = {
    'Messstelle ID': [1002, 1002, 1002, 1002, 1002],
    'Messstelle Name': ['Rämismühle, Zell']*5,
    'Probenahme Ort': ['---']*5,
    'Probenahme Art': ['Stichprobe']*5,
    'NAQUA Probenahme Datum': [None]*5,
    'NAQUA Probenahme Uhrzeit': [None]*5,
    'NAWA Probenahme Beginn (Datum und Uhrzeit)': ['09.03.2011 09:09']*5,
    'NAWA Probenahme Ende (Datum und Uhrzeit)': [None]*5,
    'NAWA Probenahme Dauer (Stunden)': [None]*5,
    'Labor': [None]*5,
    'Parameter': ['doc(mg/l)', 'nitrate_nitrogen(mg/l)', 'electrical_conductivity(µS/cm)', 'nitrite_nitrogen(mg/l)', 'ortho_phosphate_phosphorus_filtered(mg/l)'],
    'Messwert': [1.02, 1.63, 480, 0.011, 0.002],
    'Bestimmungsgrenze': [0.24, 0.032, None, 0.001, 0.002],
    'NAWA Nachweisgrenze': [None]*5,
    'Einheit': ['mg/l', 'mg/l', 'µS/cm', 'mg/l', 'mg/l'],
    'Messunsicherheit absolut/relativ': [None]*5,
    'Messunsicherheit': [None]*5,
    'Gerät/Methode': ['---']*5,
    'Bemerkung Messwert': [None]*5,
    'Status': ['Freigegeben, validierte Daten']*5
}

df = pd.DataFrame(data)

# Pivot the DataFrame
pivoted_df = df.pivot(index=['Messstelle ID', 'Messstelle Name', 'Probenahme Ort', 'Probenahme Art', 
                             'NAQUA Probenahme Datum', 'NAQUA Probenahme Uhrzeit', 
                             'NAWA Probenahme Beginn (Datum und Uhrzeit)', 'NAWA Probenahme Ende (Datum und Uhrzeit)', 
                             'NAWA Probenahme Dauer (Stunden)', 'Labor'], 
                      columns='Parameter', 
                      values='Messwert').reset_index()

# Flatten the MultiIndex columns
pivoted_df.columns.name = None
pivoted_df.columns = [col if not isinstance(col, tuple) else col[1] for col in pivoted_df.columns]

# Display the resulting DataFrame
pivoted_df


Unnamed: 0,Messstelle ID,Messstelle Name,Probenahme Ort,Probenahme Art,NAQUA Probenahme Datum,NAQUA Probenahme Uhrzeit,NAWA Probenahme Beginn (Datum und Uhrzeit),NAWA Probenahme Ende (Datum und Uhrzeit),NAWA Probenahme Dauer (Stunden),Labor,doc(mg/l),electrical_conductivity(µS/cm),nitrate_nitrogen(mg/l),nitrite_nitrogen(mg/l),ortho_phosphate_phosphorus_filtered(mg/l)
0,1002,"Rämismühle, Zell",---,Stichprobe,,,09.03.2011 09:09,,,,1.02,480.0,1.63,0.011,0.002


In [17]:
dataset_nawa.Parameter.unique()

array(['doc(mg/l)', 'nitrate_nitrogen(mgN/l)',
       'electrical_conductivity(µS/cm)', 'nitrite_nitrogen(mgN/l)',
       'ortho_phosphate_phosphorus_filtered(mg/l)',
       'ammonium_nitrogen(mgN/l)', 'ph_value', 'water_temperature(°C)',
       'daily_maximum_discharge(m3/s)', 'total_nitrogen_unfiltered(mg/l)',
       'daily_mean_discharge(m3/s)', 'oxygen(mg/l)', 'chloride(mg/l)',
       'total_phosphorus_unfiltered(mg/l)',
       'daily_minimum_discharge(m3/s)', 'nitrite(mg/l)', 'ammonium(mg/l)',
       'nitrate(mg/l)', 'ortho_phosphate(mg/l)', 'discharge(m3/s)',
       'oxygen_saturation(%)', 'turbidity(NTU)', 'bromide(mg/l)',
       'fluoride(mg/l)', 'sulfate(mg/l)', 'mtbe(µg/l)', 'etbe(µg/l)',
       'tetrahydrofuran(µg/l)', '1_4_dioxane(µg/l)',
       'hexachloroethane(µg/l)', '1_1_1_trichloroethane(µg/l)',
       'carbon_tetrachloride(µg/l)', 'hexachlorobutadiene(µg/l)',
       'trichloroethene(µg/l)', 'trichlorofluoromethane(µg/l)',
       'tribromomethane(µg/l)', '1_3_5_trichl

In [31]:
# Rename columns based on the dictionary
dataset_naduf.rename(columns=column_rename_dict, inplace=True)

In [32]:
dataset_naduf.columns

Index(['naduf_id', 'status_number', 'remark', 'year', 'date', 'duration',
       'mean_discharge(m3/s)', 'total_discharge(Miom3)', 'temperature(°C)',
       'pH(-)', 'conductivity_25C(µS/cm)', 'oxygen(mg/l)',
       'oxygen_saturation(%)', 'pH_lab(-)', 'conductivity_20C_lab(µS/cm)',
       'total_hardness(mmol/l)', 'alkalinity(mmol/l)', 'calcium(mg/l)',
       'magnesium(mg/l)', 'nitrate(mgN/l)', 'total_nitrogen(mgN/l)',
       'DRP(mgP/l)', 'total_phosphorus(mgP/l)',
       'total_phosphorus_filtered(mgP/l)', 'chloride(mg/l)', 'fluoride(mg/l)',
       'bromide(mg/l)', 'silicate(mgH4SiO4/l)', 'sulphate(mgSO4/l)',
       'sodium(mg/l)', 'potassium(mg/l)', 'iron(mg/l)', 'TOC(mgC/l)',
       'DOC(mgC/l)', 'suspended_material(mg/l)', 'chromium(µg/l)',
       'zinc(µg/l)', 'copper(µg/l)', 'cadmium(µg/l)', 'lead(µg/l)',
       'nickel(µg/l)', 'mercury(µg/l)', 'barium(µg/l)', 'strontium(µg/l)',
       'arsenic(µg/l)', 'manganese(µg/l)'],
      dtype='object')

In [33]:
dataset_naduf = dataset_naduf[['naduf_id', 'date',
       'mean_discharge(m3/s)',
       'temperature(°C)', 'pH(-)', 'conductivity_25C(µS/cm)',
       'oxygen(mg/l)', 'oxygen_saturation(%)', 'pH_lab(-)',
       'conductivity_20C_lab(µS/cm)', 'total_hardness(mmol/l)',
       'alkalinity(mmol/l)', 'calcium(mg/l)', 'magnesium(mg/l)',
       'nitrate(mgN/l)', 'total_nitrogen(mgN/l)', 'DRP(mgP/l)',
       'total_phosphorus(mgP/l)', 'total_phosphorus_filtered(mgP/l)',
       'chloride(mg/l)', 'fluoride(mg/l)', 'bromide(mg/l)',
       'silicate(mgH4SiO4/l)', 'sulphate(mgSO4/l)', 'sodium(mg/l)',
       'potassium(mg/l)', 'iron(mg/l)', 'TOC(mgC/l)', 'DOC(mgC/l)',
       'suspended_material(mg/l)', 'chromium(µg/l)', 'zinc(µg/l)',
       'copper(µg/l)', 'cadmium(µg/l)', 'lead(µg/l)', 'nickel(µg/l)',
       'mercury(µg/l)', 'barium(µg/l)', 'strontium(µg/l)', 'arsenic(µg/l)',
       'manganese(µg/l)']]
dataset_naduf

Unnamed: 0,naduf_id,date,mean_discharge(m3/s),temperature(°C),pH(-),conductivity_25C(µS/cm),oxygen(mg/l),oxygen_saturation(%),pH_lab(-),conductivity_20C_lab(µS/cm),...,zinc(µg/l),copper(µg/l),cadmium(µg/l),lead(µg/l),nickel(µg/l),mercury(µg/l),barium(µg/l),strontium(µg/l),arsenic(µg/l),manganese(µg/l)
0,1181,1982-11-15 06:00:00,5.313851,7.271006,,,,,8.480000,367.066770,...,10.406677,3.019255,0.073556,1.790373,,,,,,
1,1181,1982-11-29 06:00:00,9.046227,4.826679,,,,,8.430766,356.188458,...,,,,,,,,,,
2,1181,1982-12-13 05:30:00,10.864181,4.872490,,,,,8.356344,348.381529,...,,,,,,,,,,
3,1181,1982-12-27 05:30:00,27.653205,3.792989,,,,,8.474643,338.992271,...,26.11415,2.798573,0.025071,1.349287,,,,,,
4,1181,1983-01-10 05:55:00,12.789252,3.064523,,,,,8.281437,274.674792,...,28.858242,2.764272,0.026786,1.332136,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14464,6169,2020-10-26 10:35:00,10.570123,8.946462,,,,,8.410000,317.222586,...,,,,,,,,,,
14465,6169,2020-11-09 10:15:00,15.549123,8.916996,,,,,8.390000,313.269589,...,,,,,,,,,,
14466,6169,2020-11-23 10:05:00,6.439759,6.657077,,,,,8.380000,342.636797,...,,,,,,,,,,
14467,6169,2020-12-07 10:05:00,4.323554,3.561146,,,,,8.400031,383.225945,...,,,,,,,,,,


In [34]:
# Convert to datetime:
dataset_naduf["date"] = pd.to_datetime(dataset_naduf["date"], format='%Y-%m-%d')
dataset_naduf

Unnamed: 0,naduf_id,date,mean_discharge(m3/s),temperature(°C),pH(-),conductivity_25C(µS/cm),oxygen(mg/l),oxygen_saturation(%),pH_lab(-),conductivity_20C_lab(µS/cm),...,zinc(µg/l),copper(µg/l),cadmium(µg/l),lead(µg/l),nickel(µg/l),mercury(µg/l),barium(µg/l),strontium(µg/l),arsenic(µg/l),manganese(µg/l)
0,1181,1982-11-15 06:00:00,5.313851,7.271006,,,,,8.480000,367.066770,...,10.406677,3.019255,0.073556,1.790373,,,,,,
1,1181,1982-11-29 06:00:00,9.046227,4.826679,,,,,8.430766,356.188458,...,,,,,,,,,,
2,1181,1982-12-13 05:30:00,10.864181,4.872490,,,,,8.356344,348.381529,...,,,,,,,,,,
3,1181,1982-12-27 05:30:00,27.653205,3.792989,,,,,8.474643,338.992271,...,26.11415,2.798573,0.025071,1.349287,,,,,,
4,1181,1983-01-10 05:55:00,12.789252,3.064523,,,,,8.281437,274.674792,...,28.858242,2.764272,0.026786,1.332136,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14464,6169,2020-10-26 10:35:00,10.570123,8.946462,,,,,8.410000,317.222586,...,,,,,,,,,,
14465,6169,2020-11-09 10:15:00,15.549123,8.916996,,,,,8.390000,313.269589,...,,,,,,,,,,
14466,6169,2020-11-23 10:05:00,6.439759,6.657077,,,,,8.380000,342.636797,...,,,,,,,,,,
14467,6169,2020-12-07 10:05:00,4.323554,3.561146,,,,,8.400031,383.225945,...,,,,,,,,,,


In [63]:
for code in tqdm.tqdm(network_naduf.naduf_id):
    
    dataset = dataset_naduf[dataset_naduf["naduf_id"] == code]
    dataset.set_index("date", inplace = True)
    dataset.drop(["naduf_id"], axis=1, inplace = True)
    
    dataset.index.name = "date"
    
    # There are some non-numeric things in the columns, instead of NaNs
    dataset = dataset.apply(pd.to_numeric, errors='coerce')
    # Here we take out the > or < before converting to a numeric value:
    dataset = dataset.applymap(lambda x: str(x).replace('<', '') if isinstance(x, str) else x)
    dataset = dataset.applymap(lambda x: str(x).replace('>', '') if isinstance(x, str) else x)

    # There are some non-numeric things in the columns, instead of NaNs
    dataset = dataset.apply(pd.to_numeric, errors='coerce')

    dataset = dataset.round(4)

    dataset.to_csv(PATH_OUTPUT + "/NADUF/CAMELS_CH_chem_intervals_"+str(code)+".csv", encoding='latin')
    

100%|██████████| 24/24 [00:01<00:00, 20.52it/s]


Observations
- We have 24 stations in total (one is empty: 1827)
- So far, the itnervals are variable (not resampled)

# End