Data extracted from http://naiades.eaufrance.fr/acces-donnees#/physicochimie  
- **River :** Saône - id 'U---0000 -'
- **Dates :** 2011-01-01 to 2021-12-31
- **Parameter :** Nitrates - id '1340'
- **Measure unit :** : mg(NO3)/L
- **Data classification :** All

In [173]:
import pandas as pd

In [174]:
nitrates = pd.read_csv('../data/nitrates/naiades-saone-2011-2021-nitrates/Analyses.CSV',sep=';')

In [175]:
# Colonne pour le traitement
used_columns = [
    'CdStationMesureEauxSurface', # id station
    'DatePrel',                   # date mesure
    'HeurePrel',                  # heure de la mesure
    'CdParametre',                # code du paramètre
    'LbLongParamètre',            # label du paramètre
    'RsAna',                      # resultat de la mesure
    'CdQualAna',                  # Code de la qualification du résultat :
                                  # 1 -> correcte / autre, à filtrer (3 -> incertaine)

    'SymUniteMesure',             # symbole de l'unité de mesure (°C...)
    'LqAna',                      # limite de quantification (sous laquelle la mesure n'est pas fidèle)
    
]


In [176]:
nitrates = nitrates[used_columns]
nitrates.head()

Unnamed: 0,CdStationMesureEauxSurface,DatePrel,HeurePrel,CdParametre,LbLongParamètre,RsAna,CdQualAna,SymUniteMesure,LqAna
0,6000990,2013-01-28,14:30:00,1340,Nitrates,4.3,1,mg(NO3)/L,1.0
1,6000990,2013-03-25,15:10:00,1340,Nitrates,3.8,1,mg(NO3)/L,1.0
2,6000990,2013-05-29,14:30:00,1340,Nitrates,3.5,1,mg(NO3)/L,1.0
3,6000990,2013-07-22,13:15:00,1340,Nitrates,4.9,1,mg(NO3)/L,1.0
4,6000990,2013-09-23,14:10:00,1340,Nitrates,5.1,1,mg(NO3)/L,1.0


In [177]:
# Checks that all measures are Nitrates measure (CdParametre = 1340)
# Then Drops CdParametre and LbLongParamètre columns
assert len(nitrates[nitrates['CdParametre'] == 1340]) == len(nitrates)
nitrates.drop(columns=['CdParametre','LbLongParamètre'],inplace = True)

In [178]:
# Filters on qualified measures
# Then drop the column
nitrates = nitrates[nitrates['CdQualAna'] == 1]
nitrates.drop(columns=['CdQualAna'],inplace = True)

In [179]:
# Checks that all measures are above the quantification measure 'LqAna'
# then drops the column 
assert (nitrates['RsAna'] < nitrates['LqAna']).sum() == 0
nitrates.drop(columns=['LqAna'],inplace = True)

In [180]:
# Checks that all unit measure are 'mg(NO3)/L'
# Then drops the column
assert len(nitrates['SymUniteMesure']) == len(nitrates)
nitrates.drop(columns=['SymUniteMesure'],inplace=True)

In [181]:
# Removes duplicates
nitrates.duplicated().sum() # -> 3 duplicates
nitrates.drop_duplicates(inplace=True)

In [182]:
nitrates.head()

Unnamed: 0,CdStationMesureEauxSurface,DatePrel,HeurePrel,RsAna
0,6000990,2013-01-28,14:30:00,4.3
1,6000990,2013-03-25,15:10:00,3.8
2,6000990,2013-05-29,14:30:00,3.5
3,6000990,2013-07-22,13:15:00,4.9
4,6000990,2013-09-23,14:10:00,5.1


In [183]:
# Meaning time precision is the day
# Measures done the same day on the same station are averaged

nitrates[['CdStationMesureEauxSurface','DatePrel']].duplicated(keep=False).sum()
# 73 measures are concerned (not unique measures, considering the date and the station)

first = lambda x : x.iloc[0] # aggregate function to keep the first entry of a group
nitrates = nitrates.groupby(['CdStationMesureEauxSurface','DatePrel'],as_index=False).agg({'RsAna':'mean','HeurePrel':first})


In [184]:
# Converts 'DatePrel' to datetime, drops HeurePrel (meaningless information)
nitrates['DatePrel'] = pd.to_datetime( nitrates['DatePrel'])
nitrates.drop(columns=['HeurePrel'],inplace=True)

In [185]:
column_names = [
    'station_id',
    'day',
    'measure'
]
nitrates.columns = column_names

In [186]:
nitrates.to_csv('../data/nitrates/saone_2011_2021.csv')