In [212]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from pathlib import Path

BASE_DIR = Path().resolve().parents[1]

In [None]:
url = "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=01"

resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
print(resp.status_code)
print(resp.headers.get("content-type"))
print(resp.text[:500])

In [None]:
soup = BeautifulSoup(resp.text, "lxml")

tables = soup.find_all("table")
print("Nombre de tables :", len(tables))

for i, t in enumerate(tables[:10]): 
    txt = t.get_text(" ", strip=True)
    print("\n--- TABLE", i, "---")
    print(txt[:300])

In [None]:
table = tables[0]

rows = table.find_all("tr")
print("Nombre de lignes (tr) :", len(rows))

for r in rows[:3]:
    cells = [c.get_text(" ", strip=True) for c in r.find_all(["th","td"])]
    print(cells)

In [None]:
rows = table.find_all("tr")

header_cells = [c.get_text(" ", strip=True) for c in rows[1].find_all(["th","td"])]
print("HEADER:", header_cells)

med_idx = header_cells.index("Médecins*")
print("Index colonne Médecins:", med_idx)


print("\nDernières lignes du tableau :\n")

for r in rows[-5:]:
    cells = [c.get_text(" ", strip=True) for c in r.find_all(["th","td"])]
    print(cells)

In [None]:
med_col_idx = 1  

total_medecins = None

for r in rows:
    cells = [c.get_text(" ", strip=True) for c in r.find_all(["th","td"])]
    if len(cells) == 0:
        continue
    if cells[0].upper() == "TOTAL":
        total_medecins = int(cells[med_idx].replace(" ", ""))
        break

print("Valeur brute TOTAL médecins :", total_medecins)


In [37]:
region_mapping = {
    "01": ("84", "Auvergne-Rhône-Alpes"),
    "02": ("27", "Bourgogne-Franche-Comté"),
    "03": ("53", "Bretagne"),
    "04": ("24", "Centre-Val de Loire"),
    "05": ("94", "Corse"),
    "06": ("44", "Grand Est"),
    "07": ("32", "Hauts-de-France"),
    "08": ("11", "Île-de-France"),
    "09": ("28", "Normandie"),
    "010": ("75", "Nouvelle-Aquitaine"),
    "011": ("76", "Occitanie"),
    "012": ("52", "Pays de la Loire"),
    "013": ("93", "Provence-Alpes-Côte d'Azur")}

In [172]:
list_url = ["https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=01",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=02",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=03",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=04",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=05",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=06",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=07",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=08",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=09",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=010",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=011",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=012",
            "https://www.carmf.fr/page.php?page=chiffrescles/geo/regions.php?id=013"]

dens_med = []

for url in list_url:

    region_id = url.split("id=")[1]
    region_code, region_name = region_mapping[region_id]
    region_code = int(region_code)

    resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(resp.text, "lxml")

    table = soup.find_all("table")[0]
    rows = table.find_all("tr")

    header_cells = [c.get_text(" ", strip=True) for c in rows[1].find_all(["th","td"])]
    med_idx = header_cells.index("Médecins*")

    total_medecins = None
    for r in rows:
        cells = [c.get_text(" ", strip=True) for c in r.find_all(["th","td"])]
        if not cells:
            continue
        if cells[0].upper() == "TOTAL":
            total_medecins = int(cells[med_idx].replace(" ", ""))
            break

    dens_med.append({
        "region": int(region_code),
        "region_name": region_name,
        "medecins_liberaux": total_medecins})

In [173]:
dens_med = pd.DataFrame(dens_med)

In [213]:
atc1 = pd.read_csv(BASE_DIR / "data/processed/atc1_prescriptions_2014_2024.csv", sep = ";")
atc1 = atc1.groupby(["annee", "region"])["boites"].sum().reset_index()
atc1 = atc1[atc1["annee"] == 2024]

In [None]:
df_pop = pd.read_csv(BASE_DIR / "data/processed/pop_reg_annee.csv", sep = ";")

In [175]:
df_dens_med = atc1.merge(df_pop,on=["annee", "region"],how="left")

In [176]:
df_dens_med = df_dens_med.merge(dens_med[["region", "medecins_liberaux"]], on="region", how="left")

In [182]:
df_dens_med["boites_pour_1000_hab"] = df_dens_med["boites"] / df_dens_med["population"] * 1000
df_dens_med["boites_par_medecin"] = df_dens_med["boites"] / df_dens_med["medecins_liberaux"]
df_dens_med["habitants_par_medecin"] = df_dens_med["population"] / df_dens_med["medecins_liberaux"]

df_dens_med["boites_par_medecin"] = df_dens_med["boites_par_medecin"].round().astype(int)
df_dens_med["habitants_par_medecin"] = df_dens_med["habitants_par_medecin"].round().astype(int)
df_dens_med["boites_pour_1000_hab"] = df_dens_med["boites_pour_1000_hab"].round().astype(int)


In [None]:
df_dens_med.to_csv(BASE_DIR / "data/processed/df_dens_med.csv", sep = ";", index = False)
df_dens_med.to_parquet(BASE_DIR / "data/processed/df_dens_med.parquet", index = False)