In [1]:
import psycopg2
from psycopg2 import pool
import json
import pandas as pd
import time
from psycopg2.extras import execute_batch
from concurrent.futures import ThreadPoolExecutor

start_time = time.time()

DB_NAME = "next_gen"
DB_USER = "sde"
DB_PASS = "sde"
DB_HOST = "salesiqgen2.cygagau4oro0.us-west-2.rds.amazonaws.com"
DB_PORT = "5432"


try:
    # Create a connection pool
    connection_pool = psycopg2.pool.ThreadedConnectionPool(1, 10, database=DB_NAME, user=DB_USER, password=DB_PASS, host=DB_HOST, port=DB_PORT)
    if connection_pool:
        print("Connection pool created successfully")
except Exception as e:
    print(f"Connection pool creation failed: {e}")

# Function to find max level
def find_max_level(cur):
    max_level = 0
    level = 1
    while True:
        code_column = f"level{level}_code"
        cur.execute(f"SELECT COUNT(*) FROM qa_mergetest5_position_geo_temp WHERE {code_column} IS NOT NULL")
        count = cur.fetchone()[0]
        if count == 0:
            break
        max_level = level
        level += 1
    return max_level

try:
    # Get a connection from the pool
    conn = connection_pool.getconn()
    cur = conn.cursor()

    max_level = find_max_level(cur)

    cur.execute("""
        SELECT DISTINCT jsonb_object_keys(CAST(metric_data AS jsonb)) AS metric_key
        FROM qa_mergetest5_position_geo_temp
    """)
    metric_keys = [row[0] for row in cur.fetchall()]

    # Prepare metric data items for the SQL query
    metric_data_items = ', '.join([f"'{metric_key}', SUM(COALESCE((metric_data::jsonb->>'{metric_key}')::float, 0))" for metric_key in metric_keys])

    # Function to execute a single query
    def execute_query(query):
        conn = connection_pool.getconn()
        try:
            with conn.cursor() as cur:
                cur.execute(query)
                return cur.fetchall()
        finally:
            connection_pool.putconn(conn)

    queries = []
    for level in range(1, max_level + 1):
        code_column = f"level{level}_code"
        code_name = f"level{level}_name"
        queries.append(f"""
            SELECT 
                {code_column} AS code,
                {code_name} AS name,
                {level} AS level, 
                jsonb_build_object(
                    {metric_data_items}
                ) AS metric_data,
                ST_Union(shape) AS shape
            FROM qa_mergetest5_position_geo_temp
            GROUP BY {code_column}, {code_name}
        """)

    with ThreadPoolExecutor(max_workers=6) as executor:
        results = list(executor.map(execute_query, queries))

    # Flatten the results
    results = [item for sublist in results for item in sublist]

    columns = ["code", "name", "level", "metric_data", "shape"]
    df = pd.DataFrame(results, columns=columns)

    df["metric_data"] = df["metric_data"].apply(lambda x: json.dumps(x))

    # Convert DataFrame to list of tuples
    records = df.to_records(index=False)
    records_list = [(r[0], r[1], int(r[2]), r[3], r[4]) for r in records]

    # Define the insert query
    insert_query = """
        INSERT INTO hg_positions (code, name, level, metric_data, shape)
        VALUES (%s, %s, %s, %s, %s)
    """

    # Bulk insert using execute_batch
    execute_batch(cur, insert_query, records_list)

    # Commit the transaction
    conn.commit()

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    # Close the cursor and return the connection to the pool
    cur.close()
    connection_pool.putconn(conn)

    # Close all connections in the pool
    connection_pool.closeall()

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Script completed in {elapsed_time:.2f} seconds")


Connection pool created successfully
Script completed in 26.01 seconds
