How to efficiently insert data that could be repeated in a loop? #10150
-
|
I think my code explains it better than my attempt at the title. I have a many-to-many relation between I tried and I ended up with three approaches, sadly, none of them work. Two of them does not exactly do what I want, the other errors out. I have left inline comments in the code for the three approaches I tried and why it does not do what I want. So the question, how can I achieve this in the best possible way such that "creators" are not repeated and the foreign key constraint is satisfied? TIA 🙂 from sqlalchemy import Column, ForeignKey, Table, create_engine
from sqlalchemy.orm import (
DeclarativeBase,
Mapped,
Session,
mapped_column,
relationship,
)
from sqlalchemy.dialects.postgresql import insert
class Base(DeclarativeBase):
pass
association_table = Table(
"association_table",
Base.metadata,
Column("book_id", ForeignKey("book.id", ondelete="cascade")),
Column("creator_name", ForeignKey("creator.name", ondelete="cascade")),
)
class Book(Base):
__tablename__ = "book"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str]
creators: Mapped[list["Creator"]] = relationship(
secondary=association_table, back_populates="books"
)
class Creator(Base):
__tablename__ = "creator"
name: Mapped[str] = mapped_column(primary_key=True)
books: Mapped[list[Book]] = relationship(
secondary=association_table, back_populates="creators"
)
engine = create_engine("postgresql+psycopg://")
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
rows = [
{"creators": ["Creator1", "Creator2"], "name": "Book1"},
{
"creators": ["Creator3", "Creator1", "Creator3"], # this is duplicated in source
"name": "Book2",
},
# more rows (tested with 3k rows having at most 10 creators per row)
]
with Session(engine) as session, session.begin():
for row in rows:
# takes 6 seconds for actual data, works fine, except the association table lacks one row (2, 'Creator1')
# creators = session.scalars(
# insert(Creator)
# .on_conflict_do_nothing(index_elements=[Creator.name])
# .values([{"name": i} for i in row.pop("creators")])
# .returning(Creator)
# ).all()
# fails due to IntegrityError
# creators = [session.get(Creator, i) or Creator(name=i) for i in row.pop("creators")]
# takes 11 seconds for actual data, works fine, except the association table has a repeated row (2, 'Creator3')
creators = [session.merge(Creator(name=i)) for i in row.pop("creators")]
session.add(Book(creators=creators, **row)) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
|
we have a general recipe for "unique object" where unique is on some field such as a name at https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObject |
Beta Was this translation helpful? Give feedback.
we have a general recipe for "unique object" where unique is on some field such as a name at https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObject