In [4]:
from enum import Enum
from sqlalchemy import Column, Integer, String, create_engine, DateTime, Boolean, ForeignKey, select
from sqlalchemy.orm import declarative_base, mapped_column, Mapped, relationship, sessionmaker
from sqlalchemy.exc import SQLAlchemyError
from typing import Optional, Dict, Any, List
from datetime import datetime

DATABASE_URL = "mysql+pymysql://root:@127.0.0.1:3306/paruvendu"
engine = create_engine(DATABASE_URL, echo=True)

Base = declarative_base()

class AnnouncementType(Enum):
    MAISON = 1
    APPARTEMENT = 2
    NB = 3


class AnnouncerType(Enum):
    PARTICULIER = 1
    PROFESSIONNELLE = 2

class Agency(Base):
    __tablename__ = 'announcer'
    id = Column(Integer, primary_key=True)
    name = Column(String(100), nullable=True)
    address = Column(String(100), nullable=True)
    slogan = Column(String(200), nullable=True)
    description = Column(String(1000), nullable=True)
    telephone = Column(String(15), nullable=True)
    yearMember = Column(String(15), nullable=True)
    announcements: Mapped[List["Announcement"]] = relationship(
        "Announcement",
        back_populates="agency",
        cascade="all, delete-orphan",
        lazy="select"
    )

class Announcement(Base):
    __tablename__ = 'announcements'
    id = Column(Integer, primary_key=True)
    ref = Column(String(20), nullable=True)
    title = Column(String(50), nullable=True)
    description = Column(String(1000), nullable=True)
    price = Column(Integer, nullable=True)
    price_per_meter = Column(Integer, nullable=True)
    publish_at = Column(DateTime, nullable=True)
    exclusive = Column(Boolean, nullable=True)
    updated_at = Column(DateTime, nullable=True)
    dpe = Column(String(1), nullable=True, info={'comment': 'DPE rating A to G'})
    url = Column(String(200), nullable=True)
    type = Column(Integer, nullable=True)
    agency_id = mapped_column(ForeignKey("announcer.id"))
    agency: Mapped[Agency] = relationship(
        "Agency",
        back_populates="announcements",
        lazy="joined"
    )

class Caracteristic(Base):
    __tablename__ = 'estate'
    id = Column(Integer, primary_key=True)
    announcement_id = mapped_column(ForeignKey("announcements.id"))
    announcement: Mapped[Announcement] = relationship()
    nb_rooms = Column(Integer, nullable=True)
    nb_bedrooms = Column(Integer, nullable=True)
    location = Column(String(50), nullable=True, info={'comment': 'Location'})
    parking_garage = Column(Boolean, nullable=True)
    garden = Column(Boolean, nullable=True)
    balcony_terrace = Column(Boolean, nullable=True)
    annexes = Column(String(10), nullable=True)
    access = Column(String(50), nullable=True)
    arrangement = Column(String(50), nullable=True)
    dependence = Column(String(50), nullable=True)
    outside = Column(String(50), nullable=True)
    connectivity_index = Column(Integer, nullable=True)
    fiber_eligibility_rate = Column(Integer, nullable=True)
    general_information = Column(String(200), nullable=True)
    surface = Column(String(200), nullable=True)
    agencement = Column(String(200), nullable=True)

def init_db():
    Base.metadata.create_all(engine)

def drop_db():
    Base.metadata.drop_all(engine)

def reset_db():
    drop_db()
    init_db()
    print("Database reset completed.")

# Initialiser la base de données (créer les tables)
reset_db()


2025-12-03 16:52:56,274 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2025-12-03 16:52:56,275 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,276 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2025-12-03 16:52:56,276 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,277 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2025-12-03 16:52:56,277 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,278 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-12-03 16:52:56,278 INFO sqlalchemy.engine.Engine DESCRIBE `paruvendu`.`announcer`
2025-12-03 16:52:56,279 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,281 INFO sqlalchemy.engine.Engine DESCRIBE `paruvendu`.`announcements`
2025-12-03 16:52:56,282 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,284 INFO sqlalchemy.engine.Engine DESCRIBE `paruvendu`.`estate`
2025-12-03 16:52:56,284 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-12-03 16:52:56,286 INFO sqlalc

In [5]:
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False)

def create_announcement(
    session,
    announcement_data: Dict[str, Any],
    announcer_data: Dict[str, Any],
    estate_data: Optional[Dict[str, Any]] = None,
    *,
    dedupe_agency_by: Optional[list] = None
):
    """
    Crée une annonce, associe une agence (sans la dupliquer) et crée les caractéristiques optionnelles.

    - session: SQLAlchemy session (ex: SessionLocal()).
    - announcement_data: dict contenant les champs de Announcement (ref, title, price, publish_at, url, type, etc.)
    - announcer_data: dict avec les champs de Agency (name, telephone, address, slogan?, description?)
    - estate_data: dict optionnel pour Caracteristic.
    - dedupe_agency_by: liste des champs sur lesquels dédupliquer l'agence (par défaut ['name', 'telephone']).

    Retourne l'objet Announcement (attaché à la session) après commit.
    """
    if dedupe_agency_by is None:
        dedupe_agency_by = ["name"]

    try:
        # Transaction
        with session.begin():
            # 1) chercher l'agence existante en se basant sur les champs fournis
            agency_filters = {}
            for key in dedupe_agency_by:
                if key in announcer_data and announcer_data[key] is not None:
                    agency_filters[key] = announcer_data[key]

            agency = None
            if agency_filters:
                stmt = select(Agency).filter_by(**agency_filters)
                agency = session.execute(stmt).scalars().first()

            # 2) si pas trouvée, créer l'agence (en utilisant seulement les champs valides)
            if agency is None:
                # Construire un dict contenant uniquement les colonnes existantes sur Agency
                allowed_agency_fields = {
                    "name", "address", "slogan", "description", "telephone"
                }
                agency_kwargs = {k: announcer_data.get(k) for k in allowed_agency_fields if k in announcer_data}
                agency = Agency(**agency_kwargs)
                session.add(agency)
                # flush pour obtenir agency.id si nécessaire plus loin
                session.flush()

            # 3) vérifier si l'annonce existe déjà (optionnel) -> éviter duplication si ref unique
            announcement = None
            ref = announcement_data.get("ref")
            if ref:
                stmt = select(Announcement).filter_by(ref=ref, agency_id=agency.id)
                announcement = session.execute(stmt).scalars().first()

            raw_price = announcement_data.get("price", "NB").replace(" ", "")
            price_val = int(raw_price) if raw_price.isdigit() else None
            announcement_data["price"] = price_val
            # 4) créer ou mettre à jour l'annonce
            allowed_announcement_fields = {
                "ref", "title", "description", "price", "price_per_meter", "publish_at", "exclusive",
                "updated_at", "dpe", "url", "type"
            }
            ann_kwargs = {k: announcement_data.get(k) for k in allowed_announcement_fields if k in announcement_data}

            # si l'utilisateur passe un Enum AnnouncementType, convertir en int
            if "type" in ann_kwargs and ann_kwargs["type"] is not None:
                t = ann_kwargs["type"]
                # supporte int, Enum, ou string représentant un int
                if hasattr(t, "value"):
                    ann_kwargs["type"] = int(t.value)
                else:
                    try:
                        ann_kwargs["type"] = int(t)
                    except Exception:
                        # laisse tel quel (DB attend un int mais on assume que l'appelant envoie correct)
                        pass

            if announcement is None:
                announcement = Announcement(**ann_kwargs)
                # fixer la relation vers l'agence
                announcement.agency = agency
                session.add(announcement)
                session.flush()  # pour obtenir announcement.id si besoin
            else:
                # si déjà existante, on met à jour les champs fournis
                for k, v in ann_kwargs.items():
                    setattr(announcement, k, v)

            # 5) créer les caractéristiques si fournies
            if estate_data is not None:
                # vérifier s'il existe déjà une ligne de caractéristique pour cette annonce
                stmt = select(Caracteristic).filter_by(announcement_id=announcement.id)
                existing_car = session.execute(stmt).scalars().first()
                allowed_car_fields = {
                    "parking_garage", "garden","nb_rooms", "nb_bedrooms", "balcony_terrace", "annexes", "access",
                    "arrangement", "dependence", "outside", "connectivity_index","location",
                    "fiber_eligibility_rate", "general_information", "agencement", "surface"
                }
                car_kwargs = {k: estate_data.get(k) for k in allowed_car_fields if k in estate_data}

                if existing_car is None:
                    car_kwargs["announcement_id"] = announcement.id
                    caracteristic = Caracteristic(**car_kwargs)
                    session.add(caracteristic)
                else:
                    for k, v in car_kwargs.items():
                        setattr(existing_car, k, v)

            # commit fait automatiquement par session.begin() context manager
            # retourner l'annonce (attachée au session)
            return announcement

    except SQLAlchemyError as exc:
        # rollback implicite si exception dans session.begin()
        # relancer après log ou gestion si nécessaire
        raise


# --- Exemple d'utilisation ---
# if __name__ == "__main__":
#     s = SessionLocal()

#     agency_payload = {
#         "name": "Agence Dupont",
#         "address": "1 rue de Paris, 75001 Paris",
#         "telephone": "0123456789",
#         "yearMember": "2020",
#         "description": "Agence spécialisée dans les maisons familiales"
#     }

#     announcement_payload = {
#         "ref": "PV-2025-0001",
#         "title": "Maison familiale 4 pièces",
#         "description": "Belle maison avec jardin, proche écoles et commerces.",
    #     "price": 350000,
    #     "price_per_meter": 2500,
    #     "publish_at": datetime.now(),
    #     "exclusive": True,
    #     "updated_at": datetime.now(),
    #     "dpe": "B",
    #     "url": "https://paruvendu.example/annonce/1",
    #     "type": AnnouncementType.MAISON.value,  # int pour la DB
    # }

    # caracteristic_payload = {
    #     "nb_rooms": 5,
    #     "nb_bedrooms": 3,
    #     "location": "Paris 8ème",
    #     "parking_garage": True,
    #     "garden": False,
    #     "balcony_terrace": True,
    #     "annexes": "Cave",
    #     "access": "Digicode, Interphone",
    #     "arrangement": "Traversant",
    #     "outside": "Cour intérieure",
    #     "connectivity_index": 95,
    #     "fiber_eligibility_rate": 100,
    #     "general_information": "Proche métro et commerces"
    # }

    # ann = create_announcement(
    #     s,
    #     announcement_data=announcement_payload,
    #     announcer_data=agency_payload,
    #     estate_data=caracteristic_payload
    # )

    # print("Created announcement id:", ann.id)
    # s.close()


In [6]:
import pandas as pd
import math

column_names = [
    "url",          # 0: https://...
    "title",        # 1: Vente Maison...
    "location",     # 2: Bray sur seine...
    "desc_short",   # 3: Description T4...
    "description",  # 4: À Vendre...
    "rooms",        # 5: 4
    "surface",      # 6: 92
    "price",        # 7: 139 750
    "agency",       # 8: L'ADRESSE CHESSY
    "agencement",   # 9: 3 chambres, 1 salle de bain
    "general",    # 10: (vide dans ton exemple)
    "annexes",      # 11: Cave
    "dependence",      # 12: Terrasse
    "id_tech",      # 13: 1764140340
    "reference"     # 14: ParuVendu...
]

# On charge tout en 'string' (dtype=str) pour éviter les erreurs de conversion au début
try:
    df = pd.read_csv(
        "data/results.csv",
        header=None,
        names=column_names,
        dtype=str,
        on_bad_lines='skip' # Si une ligne est mal formée, on la saute au lieu de planter
    )
except Exception as e:
    print(f"Erreur à l'ouverture du fichier : {e}")
    df = pd.DataFrame()
    df

SyntaxError: invalid syntax (<unknown>, line 1)

In [None]:
def clean_nan(value):
    if value is None:
        return "NB"
    if isinstance(value, float) and math.isnan(value):
        return "NB"
    if isinstance(value, str) and value.lower() == "nan":
        return "NB"
    return value

In [None]:
def get_property_type(title_value):
    # On nettoie d'abord pour être sûr d'avoir une string
    val = clean_nan(title_value)
    if val == "NB":
        return  AnnouncementType.NB.value

    # On met en minuscule pour la comparaison (ex: "MAISON" devient "maison")
    title_lower = val.lower()

    if "maison" in title_lower:
        return AnnouncementType.MAISON.value
    elif "appartement" in title_lower:
        return AnnouncementType.APPARTEMENT.value
    else:
        return AnnouncementType.NB.value

In [None]:

announcement_payloads = []
agency_payload = []
caracteristic_payload = []

for i in range(len(df)):
    current_title = df["title"].iloc[i]
    detected_type = get_property_type(current_title)

    payload = {
        "url": clean_nan(df["url"].iloc[i]),
        "ref": clean_nan(df["reference"].iloc[i]),
        "title": clean_nan(current_title),
        "description": clean_nan(df["description"].iloc[i]),
        "price": clean_nan(df["price"].iloc[i]),
        "type": detected_type,
    }
    payload1 = {
        "name": clean_nan(df["agency"].iloc[i]),
    }
    payload2 = {
        "location": clean_nan(df["location"].iloc[i]),
        "surface": clean_nan(df["surface"].iloc[i]),
        "agencement": clean_nan(df["agencement"].iloc[i]),
        "general": clean_nan(df["general"].iloc[i]),
        "dependence": clean_nan(df["dependence"].iloc[i]),
        "nb_rooms": clean_nan(df["rooms"].iloc[i]),
        "annexes": clean_nan(df["annexes"].iloc[i]),
    }

    announcement_payloads.append(payload)
    agency_payload.append(payload1)
    caracteristic_payload.append(payload2)

In [None]:
if __name__ == "__main__":
    s = SessionLocal()
    for i in range(len(df)):
        ann = create_announcement(
            s,
            announcement_data=announcement_payloads[i],
            announcer_data=agency_payload[i],
            estate_data=caracteristic_payload[i]
        )
    print("Created announcement id:", ann.id)