In [1]:
import psycopg2
import json
config_file = r"21_load\db_config.json"
def get_db_connection(config_path=config_file, database_override=None):
    """
    Returns a psycopg2 connection object using the given config file.
    You can override the database name by passing `database_override`.
    """
    with open(config_path) as f:
        config = json.load(f)

    if database_override:
        config["database"] = database_override

    conn = psycopg2.connect(**config)
    return conn


In [2]:
from psycopg2 import connect, sql, errors
import json
config_file = r"21_load\db_config.json"
def load_config(config_path=config_file):
    with open(config_path) as f:
        return json.load(f)

def database_exists(db_name, config_path=config_file):
    config = load_config(config_path)
    conn = connect(database="postgres", user=config["user"], password=config["password"],
                   host=config["host"], port=config["port"])
    cur = conn.cursor()
    try:
        cur.execute("SELECT 1 FROM pg_database WHERE datname = %s;", (db_name,))
        return cur.fetchone() is not None
    finally:
        cur.close()
        conn.close()

def create_database(db_name, config_path=config_file):
    config = load_config(config_path)
    conn = connect(database="postgres", user=config["user"], password=config["password"],
                   host=config["host"], port=config["port"])
    conn.autocommit = True
    cur = conn.cursor()
    try:
        cur.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(db_name)))
        print(f"Database '{db_name}' created.")
    except errors.DuplicateDatabase:
        print(f"Database '{db_name}' already exists.")
    finally:
        cur.close()
        conn.close()

def delete_database(db_name, config_path=config_file):
    config = load_config(config_path)
    conn = connect(database="postgres", user=config["user"], password=config["password"],
                   host=config["host"], port=config["port"])
    conn.autocommit = True
    cur = conn.cursor()
    try:
        cur.execute(sql.SQL("DROP DATABASE {} ").format(sql.Identifier(db_name)))
        print(f"Database '{db_name}' deleted.")
    except errors.InvalidCatalogName:
        print(f"Database '{db_name}' does not exist.")
    except errors.ObjectInUse:
        print(f"Cannot drop '{db_name}': it is currently being accessed.")
    finally:
        cur.close()
        conn.close()


In [3]:
## Create DB 4M:

if not database_exists("4M"):
    create_database("4M")

NameError: name 'database_exists' is not defined

In [12]:
## Delete DB 4M:

if database_exists("4M"):
     delete_database("4M")

Database '4M' deleted.


In [10]:
## Delete DB 4M:

if database_exists("4M"):
     delete_database("4M")

## Create DB 4M:

if not database_exists("4M"):
    create_database("4M")

    
config_file = r"21_load\db_config.json"

def execute_sql_script(db_name, sql_file_path, config_path=config_file):
    config = load_config(config_path)
    conn = connect(database=db_name, user=config["user"], password=config["password"],
                   host=config["host"], port=config["port"])
    conn.autocommit = True
    cur = conn.cursor()
    try:
        with open(sql_file_path, "r", encoding="utf-8") as f:
            sql_script = f.read()
        cur.execute(sql_script)
        print(f"SQL script '{sql_file_path}' executed successfully on database '{db_name}'.")
    except Exception as e:
        print(f"Error executing SQL script: {e}")
    finally:
        cur.close()
        conn.close()
execute_sql_script("4M", "21_load/create_db_script.sql")


Cannot drop '4M': it is currently being accessed.
Error executing SQL script: relation "merged_dataset_metadata" already exists



In [18]:
def load_csv_to_table(db_name, table_name, csv_path, config_path=config_file):
    config = load_config(config_path)
    conn = connect(database=db_name, user=config["user"], password=config["password"],
                   host=config["host"], port=config["port"])
    conn.autocommit = True
    cur = conn.cursor()
    try:
        with open(csv_path, 'r', encoding='utf-8') as f:
            cur.copy_expert(
                sql.SQL("COPY {} FROM STDIN WITH CSV HEADER").format(sql.Identifier(table_name)),
                f
            )
        print(f"CSV '{csv_path}' loaded into table '{table_name}'.")
    except Exception as e:
        print(f"Error loading CSV: {e}")
    finally:
        cur.close()
        conn.close()

csv_path_dataset = r"11_group\merged_dataset_metadata.csv"
load_csv_to_table("4M", "merged_dataset_metadata", csv_path_dataset)

csv_path_contact_point = r"11_group\merged_contact_point_metadata.csv"
load_csv_to_table("4M", "merged_contact_point_metadata", csv_path_contact_point)

csv_path_distribution = r"11_group\merged_distribution_metadata.csv"
load_csv_to_table("4M", "merged_distribution_metadata", csv_path_distribution)


Error loading CSV: duplicate key value violates unique constraint "merged_dataset_metadata_pkey"
DETAIL:  Key (dataset_identifier)=(da4696e6-c546-4a4f-bbf4-36d5d01c9e2f-6571@agis_service_center) already exists.
CONTEXT:  COPY merged_dataset_metadata, line 2

CSV '11_group\merged_contact_point_metadata.csv' loaded into table 'merged_contact_point_metadata'.
CSV '11_group\merged_distribution_metadata.csv' loaded into table 'merged_distribution_metadata'.


### Delete

In [4]:
import psycopg2
import json

config_file = r"21_load\\db_config.json"

def load_config(config_path=config_file):
    with open(config_path) as f:
        return json.load(f)

def fetch_metadata_details(db_name, xml_filename):
    """
    Execute a query to fetch metadata details for a specific XML file.

    Args:
        db_name (str): PostgreSQL database name.
        xml_filename (str): XML filename to filter by.
    """
    query = """
    SELECT
        d.dataset_identifier,
        d.xml_filename,
        c.contact_name,
        dist.distribution_title_DE,
        dist.distribution_format
    FROM merged_dataset_metadata d
    LEFT JOIN merged_contact_point_metadata c
        ON d.dataset_identifier = c.dataset_identifier
    LEFT JOIN merged_distribution_metadata dist
        ON d.dataset_identifier = dist.dataset_identifier
    WHERE d.xml_filename = %s;
    """

    config = load_config()

    try:
        conn = psycopg2.connect(
            database=db_name,
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"]
        )
        cur = conn.cursor()
        cur.execute(query, (xml_filename,))
        rows = cur.fetchall()

        for row in rows:
            print(row)

        cur.close()
        conn.close()

    except Exception as e:
        print("Error fetching metadata details", exception=e)


# Example usage:
fetch_metadata_details("4M", "zuri-wc.xml")


('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Zueri_WC.json', 'http://publications.europa.eu/resource/authority/file-type/JSON')
('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Web Map Tile Service', 'http://publications.europa.eu/resource/authority/file-type/XML')
('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Web Feature Service', 'http://publications.europa.eu/resource/authority/file-type/WFS_SRVC')
('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Web Map Service', 'http://publications.europa.eu/resource/authority/file-type/WMS_SRVC')
('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Zueri_WC.csv', 'http://publications.europa.eu/resource/authority/file-type/CSV')
('c771f5e3-14bf-4549-87f8-9e2270842eaa@stadt-zurich', 'zuri-wc.xml', 'Open Data Zürich', 'Zueri_WC.dxf', 'http://pu

In [12]:
import psycopg2
import json

config_file = r"21_load\\db_config.json"

def load_config(config_path=config_file):
    with open(config_path) as f:
        return json.load(f)

def fetch_metadata_details(db_name, xml_filename):
    """
    Execute a query to fetch metadata details for a specific XML file.

    Args:
        db_name (str): PostgreSQL database name.
        xml_filename (str): XML filename to filter by.
    """
    query = """
        SELECT * FROM merged_distribution_metadata WHERE xml_filename = %s;
    """

    config = load_config()

    try:
        conn = psycopg2.connect(
            database=db_name,
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"]
        )
        cur = conn.cursor()
        cur.execute(query, (xml_filename,))
        rows = cur.fetchall()

        for row in rows:
            print(row)

        cur.close()
        conn.close()

    except Exception as e:
        print("Error fetching metadata details", exception=e)


# Example usage:
fetch_metadata_details("4M", "zuri-wc.xml")


In [11]:
import psycopg2
import json

config_file = r"21_load\\db_config.json"

def load_config(config_path=config_file):
    with open(config_path) as f:
        return json.load(f)

def delete_metadata_by_filename(db_name, xml_filename):
    """
    Delete entries from metadata tables by finding dataset_identifier(s) for the given xml_filename,
    then deleting from the root dataset table, cascading to children via foreign keys.

    Args:
        db_name (str): PostgreSQL database name.
        xml_filename (str): XML filename to match.
    """
    config = load_config()

    try:
        conn = psycopg2.connect(
            database=db_name,
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"]
        )
        conn.autocommit = True
        cur = conn.cursor()

        # First get all dataset_identifiers matching the xml_filename
        select_query = "SELECT dataset_identifier FROM merged_dataset_metadata WHERE xml_filename = %s;"
        cur.execute(select_query, (xml_filename,))
        dataset_ids = cur.fetchall()

        if not dataset_ids:
            print(f"No entries found for xml_filename = '{xml_filename}'")
        else:
            for dataset_id, in dataset_ids:
                cur.execute("DELETE FROM merged_dataset_metadata WHERE dataset_identifier = %s;", (dataset_id,))
            print(f"Deleted {len(dataset_ids)} dataset(s) and all related metadata entries for xml_filename = '{xml_filename}'")

        cur.close()
        conn.close()

    except Exception as e:
        print("Error deleting metadata entries", exception=e)

# Example usage:
# fetch_metadata_details("4M", "your_filename.xml")
delete_metadata_by_filename("4M", "zuri-wc.xml")

Deleted 1 dataset(s) and all related metadata entries for xml_filename = 'zuri-wc.xml'


In [1]:
import psycopg2
import json
import csv

config_file = r"21_load\\db_config.json"

def load_config(config_path=config_file):
    with open(config_path) as f:
        return json.load(f)

def delete_metadata_by_filename(db_name, xml_filename):
    """
    Delete entries from metadata tables by finding dataset_identifier(s) for the given xml_filename,
    then deleting from the root dataset table, cascading to children via foreign keys.

    Args:
        db_name (str): PostgreSQL database name.
        xml_filename (str): XML filename to match.
    """
    config = load_config()

    try:
        conn = psycopg2.connect(
            database=db_name,
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"]
        )
        conn.autocommit = True
        cur = conn.cursor()

        # First get all dataset_identifiers matching the xml_filename
        select_query = "SELECT dataset_identifier FROM merged_dataset_metadata WHERE xml_filename = %s;"
        cur.execute(select_query, (xml_filename,))
        dataset_ids = cur.fetchall()

        if not dataset_ids:
            print(f"No entries found for xml_filename = '{xml_filename}'")
        else:
            for dataset_id, in dataset_ids:
                cur.execute("DELETE FROM merged_dataset_metadata WHERE dataset_identifier = %s;", (dataset_id,))
            print(f"Deleted {len(dataset_ids)} dataset(s) and all related metadata entries for xml_filename = '{xml_filename}'")

        cur.close()
        conn.close()

    except Exception as e:
        print("Error deleting metadata entries", exception=e)

def delete_from_csv_list(csv_path, db_name):
    """
    Iterate through a CSV file and delete each metadata entry by Dataset_Name (xml_filename).

    Args:
        csv_path (str): Path to the CSV file.
        db_name (str): PostgreSQL database name.
    """
    try:
        with open(csv_path, newline='', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                xml_filename = row.get('Dataset_Name')
                if xml_filename:
                    delete_metadata_by_filename(db_name, xml_filename)
    except Exception as e:
        print("Error processing CSV file for deletions", exception=e)

# Example usage:
delete_from_csv_list("removeorder_metadata_opendata.swiss.csv", "4M")


No entries found for xml_filename = 'zuri-wc.xml'
No entries found for xml_filename = '__.xml'
No entries found for xml_filename = '__86-1.xml'
