In [2]:
import polars as pl
import os
import duckdb as db

# Chemin data

In [3]:
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
data = os.path.join(parent_dir, "data")
data_path = os.path.join(data, "BPE23.parquet")

# BDD

In [None]:
df = pl.scan_parquet(data_path)

In [8]:
df_filtered = df.filter(
    (pl.col("DOM") == "D"), 
    (pl.col("TYPEQU").str.contains("D")),
    (pl.col("LATITUDE").is_not_null() & pl.col("LONGITUDE").is_not_null()) &
    (pl.col("LATITUDE") > 41) & (pl.col("LATITUDE") < 52) &
    (pl.col("LONGITUDE") > -5) & (pl.col("LONGITUDE") < 9),
    (pl.col("SDOM").is_in(["D1", "D2", "D3"]))
).select([
    "DOM", "SDOM", "TYPEQU", "DENS3", "DENS7",
    "AAV2020", "QP", "QP2015", "LATITUDE", "LONGITUDE",
    "CODPOS", "LIBCOM"
]).collect()


In [9]:
df_filtered

DOM,SDOM,TYPEQU,DENS3,DENS7,AAV2020,QP,QP2015,LATITUDE,LONGITUDE,CODPOS,LIBCOM
str,str,str,str,str,str,str,str,f64,f64,str,str
"""D""","""D1""","""D101""","""1""","""1""","""001""","""CSZ""","""CSZ""",48.859152,2.341102,"""75001""","""PARIS 1ER ARRONDISSEMENT"""
"""D""","""D1""","""D108""","""1""","""1""","""001""","""CSZ""","""CSZ""",48.865472,2.334304,"""75001""","""PARIS 1ER ARRONDISSEMENT"""
"""D""","""D1""","""D108""","""1""","""1""","""001""","""CSZ""","""CSZ""",48.859328,2.346075,"""75001""","""PARIS 1ER ARRONDISSEMENT"""
"""D""","""D1""","""D108""","""1""","""1""","""001""","""CSZ""","""CSZ""",48.863354,2.349947,"""75001""","""PARIS 1ER ARRONDISSEMENT"""
"""D""","""D1""","""D108""","""1""","""1""","""001""","""CSZ""","""CSZ""",48.863715,2.347963,"""75001""","""PARIS 1ER ARRONDISSEMENT"""
…,…,…,…,…,…,…,…,…,…,…,…
"""D""","""D2""","""D244""","""3""","""5""","""495""","""CSZ""","""CSZ""",42.621339,8.945191,"""20220""","""MONTICELLO"""
"""D""","""D2""","""D244""","""3""","""5""","""495""","""CSZ""","""CSZ""",42.631096,8.950363,"""20220""","""MONTICELLO"""
"""D""","""D2""","""D244""","""3""","""6""","""430""","""CSZ""","""CSZ""",42.546474,8.913547,"""20225""","""MURO"""
"""D""","""D2""","""D244""","""3""","""6""","""430""","""CSZ""","""CSZ""",42.552518,8.950741,"""20225""","""NESSA"""


In [7]:
df_filtered.select(["LATITUDE", "LONGITUDE"])

LATITUDE,LONGITUDE
f64,f64
48.859152,2.341102
48.865472,2.334304
48.859328,2.346075
48.863354,2.349947
48.863715,2.347963
…,…
42.621339,8.945191
42.631096,8.950363
42.546474,8.913547
42.552518,8.950741


Les équipements de santé sont dans les variables :
- DOM : D
- SDOM : D1, D2, D3
- TYPEQU : D101, D102, D103, D108, D113

Variables importantes :
- DOM : Domaine d’appartenance de l’équipement
- SDOM : Sous-domaine d’appartenance de l’équipement
- TYPEQU : Type d’équipement 
- DENS3 : Grille communale de densité à 3 niveaux
- DENS7 : Grille communale de densité à 7 niveaux
- AAV2020 : Zonage en aire d’attraction des villes 2020 d’implantation de l’équipement
- QP : Quartier prioritaire de la politique de la ville d’appartenance de l’équipement
- QP2015 : Quartier prioritaire de la politique de la ville 2015 d’appartenance de l’équipement
- LATITUDE : Latitude en coordonnées GPS de l’équipement (degrés décimaux)
- LONGITUDE : Longitude en coordonnées GPS de l’équipement (degrés décimaux)
- CODPOS : Code postal de l’adresse d’implantation de l’équipement
- LIBCOM : Libellé de la commune d’implantation de l’équipement

# DuckDB

In [4]:
df = pl.scan_parquet(data_path)

In [5]:
df_filtered = df.select([
    "DOM", "SDOM", "TYPEQU", "CAPACITE", "ACCES_SANITAIRE", "PRES_SANITAIRE",
    "DENS3", "DENS7", "AAV2020", "QP", "QP2015", "LATITUDE", "LONGITUDE",
    "CODPOS", "LIBCOM"
])


In [6]:
df_filtered = df_filtered.collect()

In [7]:
con = db.connect(database=':memory:')
con.register('df', df_filtered)

<duckdb.duckdb.DuckDBPyConnection at 0x2cf1cd1fb70>

In [8]:
query = """
SELECT *
FROM df
WHERE
    DOM = 'D'
    OR SDOM IN ('D1', 'D2', 'D3')
    OR TYPEQU IN ('D101', 'D102', 'D103', 'D108', 'D113')
    OR ACCES_SANITAIRE = '1'
    OR PRES_SANITAIRE = '1'
"""

In [9]:
df_sante = con.execute(query).fetchdf()

: 