In [1]:
import duckdb
from pipelines.tasks._common import DUCKDB_FILE

con = duckdb.connect(database=DUCKDB_FILE, read_only=True)

from pandasql import sqldf
import pandas as pd
import geopandas as gpd
import folium
import os
from pyproj import Proj, transform
import numpy as np

query_2024 = """
SELECT * from edc_prelevements
WHERE dateprel >= '2024-01-01'
"""

prelev = con.sql(query_2024)
prelev = prelev.df()

query_2024_comm = """SELECT *
FROM edc_communes
WHERE cdreseau IN
       (SELECT cdreseau FROM prelev)"""

communes = con.sql(query_2024_comm)
communes = communes.df()

query_2024_res = """SELECT *
FROM edc_resultats
WHERE referenceprel IN
       (SELECT referenceprel FROM prelev)"""

res = con.sql(query_2024_res)
res = res.df()

# Exploration des prélèvements sur les phtalates

### Focus sur 2024

In [21]:
# Liste des phtalates d'après le gouvernement canadien https://www.canada.ca/fr/sante-canada/services/substances-chimiques/initiative-groupes-substances/phtalates.html
phtalate = np.array(
    [
        "84-66-2",
        "131-16-8",
        "84-74-2",
        "85-68-7",
        "84-75-3",
        "111381-89-6",
        "27554-26-3",
        "117-81-7",
        "68648-93-1",
        "117-84-0",
        "68515-43-5",
        "111381-91-0",
        "85507-79-5",
        "68515-47-9",
        "131-11-3",
        "84-61-7",
        "84-64-0",
        "84-69-5",
        "523-31-9",
        "5334-09-8",
        "16883-83-3",
        "27215-22-1",
        "27987-25-3",
        "68515-40-2",
        "71888-89-6",
        "28553-12-0",
        "68515-48-0",
        "26761-40-0",
        "68515-49-1",
        "3648-20-2",
    ]
)

res["libminparametre"][np.isin(res.casparam, phtalate)].unique()

array(['DEHP (2-ethylhexyl phtalate)', 'DBP (Dibutyl phthalate)'],
      dtype=object)

In [25]:
res[res["libminparametre"] == "DEHP (2-ethylhexyl phtalate)"]["cdparametre"].unique()

array([6616.])

In [23]:
res[res["libminparametre"] == "DBP (Dibutyl phthalate)"]["cdparametre"].unique()

array([1462.])

In [24]:
print(len(res[res["libminparametre"] == "DEHP (2-ethylhexyl phtalate)"]["cdparametre"]))
print(len(res[res["libminparametre"] == "DBP (Dibutyl phthalate)"]["cdparametre"]))

90
15


Parmi les données en 2024, seuls deux phtalates ont été étudié: le DEHP (2-ethylhexyl phtalate) (6616) et le DBP (Dibutyl phthalate) (1462), avec respectivement 90 et 15 prélèvements, soit très peu.

In [60]:
res_dehp = res[np.isin(res["cdparametre"], np.array([1753.0]))]

In [61]:
len(res_dehp)

33183

In [64]:
# On join res_dehp et prelev pour avoir le code insee correspondant à chaque analyse dans résultat
query_insee = """SELECT res_dehp.cddept,res_dehp.cdparametre, res_dehp.rqana,res_dehp.referenceprel,res_dehp.libminparametre,res_dehp.refqual,res_dehp.valtraduite, res_dehp.casparam,res_dehp.limitequal,prelev.inseecommuneprinc,prelev.nomcommuneprinc, prelev.cdreseau, prelev.dateprel, prelev.heureprel
FROM res_dehp
JOIN prelev
ON res_dehp.referenceprel = prelev.referenceprel
ORDER BY dateprel DESC"""

res_dehp_insee = con.sql(query_insee)
res_dehp_insee = res_dehp_insee.df()

In [None]:
res_dehp_insee = res_dehp_insee.assign(depasse=res_dehp_insee["valtraduite"] > 0.5)

In [68]:
res_dehp_insee.head()

Unnamed: 0,cddept,cdparametre,rqana,referenceprel,libminparametre,refqual,valtraduite,casparam,limitequal,inseecommuneprinc,nomcommuneprinc,cdreseau,dateprel,heureprel,depasse
0,59,1753.0,33,5900328113,Chlorure de vinyl monomère,,0.033,75-01-4,<=0.5 µg/L,59653,WAVRIN,59000414,2024-12-31,10h11,False
1,59,1753.0,33,5900328113,Chlorure de vinyl monomère,,0.033,75-01-4,<=0.5 µg/L,59653,WAVRIN,59000545,2024-12-31,10h11,False
2,59,1753.0,33,5900328113,Chlorure de vinyl monomère,,0.033,75-01-4,<=0.5 µg/L,59653,WAVRIN,59000585,2024-12-31,10h11,False
3,59,1753.0,33,5900328113,Chlorure de vinyl monomère,,0.033,75-01-4,<=0.5 µg/L,59653,WAVRIN,59000721,2024-12-31,10h11,False
4,59,1753.0,33,5900328113,Chlorure de vinyl monomère,,0.033,75-01-4,<=0.5 µg/L,59653,WAVRIN,59000948,2024-12-31,10h11,False


In [71]:
query_test = """SELECT MIN(dateprel), cdreseau
FROM res_dehp_insee
GROUP BY cdreseau"""

res_test = con.sql(query_test)
res_test = res_test.df()

In [72]:
res_test

Unnamed: 0,min(dateprel),cdreseau
0,2024-01-04,059004194
1,2024-01-31,054000006
2,2024-02-07,062000464
3,2024-01-10,062000647
4,2024-02-09,081000561
...,...,...
16419,2024-01-09,074002226
16420,2024-01-09,077000691
16421,2024-01-09,024000553
16422,2024-01-03,026001075
