In [2]:
import sys, os
import psycopg2
from dotenv import load_dotenv

sys.path.append(os.path.abspath(".."))
load_dotenv()

PGVECTOR_DB_URL = os.getenv("PGVECTOR_DB_URL")

def clean_and_constrain_vector_tables():
    with psycopg2.connect(PGVECTOR_DB_URL) as conn:
        with conn.cursor() as cur:
            print("🧹 Cleaning filing_summaries duplicates...")
            cur.execute("""
                DELETE FROM filing_summaries
                WHERE id NOT IN (
                    SELECT MIN(id)
                    FROM filing_summaries
                    GROUP BY accession
                );
            """)
            print("✅ Cleaned filing_summaries")

            print("🔒 Adding uniqueness constraint on accession...")
            try:
                cur.execute("""
                    ALTER TABLE filing_summaries
                    ADD CONSTRAINT unique_accession UNIQUE (accession);
                """)
                print("✅ Constraint added to filing_summaries\n")
            except psycopg2.errors.DuplicateObject:
                print("⚠️ Constraint already exists on filing_summaries")
                conn.rollback()

            print("🧹 Cleaning exhibit_summaries duplicates...")
            cur.execute("""
                DELETE FROM exhibit_summaries
                WHERE id NOT IN (
                    SELECT MIN(id)
                    FROM exhibit_summaries
                    GROUP BY accession, exhibit_label
                );
            """)
            print("✅ Cleaned exhibit_summaries")

            print("🔒 Adding uniqueness constraint on (accession, exhibit_label)...")
            try:
                cur.execute("""
                    ALTER TABLE exhibit_summaries
                    ADD CONSTRAINT unique_exhibit UNIQUE (accession, exhibit_label);
                """)
                print("✅ Constraint added to exhibit_summaries")
            except psycopg2.errors.DuplicateObject:
                print("⚠️ Constraint already exists on exhibit_summaries")
                conn.rollback()

            conn.commit()


# ✅ Run it
clean_and_constrain_vector_tables()


🧹 Cleaning filing_summaries duplicates...
✅ Cleaned filing_summaries
🔒 Adding uniqueness constraint on accession...
✅ Constraint added to filing_summaries

🧹 Cleaning exhibit_summaries duplicates...
✅ Cleaned exhibit_summaries
🔒 Adding uniqueness constraint on (accession, exhibit_label)...
✅ Constraint added to exhibit_summaries
