In [1]:
import subprocess
import os
import tempfile
import csv
import re
# Function to extract table names from the query log file
def extract_table_names(file_path):
    table_names = set()
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line.startswith("Table:"):
                table_name = line.split("Table:")[1].strip()
                table_names.add(table_name)
    return table_names

# Function to execute PostgreSQL configuration commands
def execute_config_commands(db_params):
    postgres_config_commands = [
        "SET statement_timeout = 0;",
        "SET lock_timeout = 0;",
        "SET idle_in_transaction_session_timeout = 0;",
        "SET client_encoding = 'UTF8';",
        "SET standard_conforming_strings = on;",
        #"SELECT pg_catalog.set_config('search_path', '', false);",
        "SET check_function_bodies = false;",
        "SET xmloption = content;",
        "SET client_min_messages = warning;",
        "SET row_security = off;",
        "SET session_replication_role = 'replica';"  # This line disables foreign key checks
    ]
    config_command = " ".join(postgres_config_commands)
    command = f"psql -h {db_params['host']} -p {db_params['port']} -U {db_params['user']} -d {db_params['dbname']} -c \"{config_command}\""
  
    try:
        subprocess.run(command, shell=True, env={'PGPASSWORD': db_params['password']})
    except Exception as e:
        print(f"Error executing configuration commands: {e}")

def get_row_count(table_name, db_params):
    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.sql') as sql_script:
        # Write the SQL command to the temporary file
        sql_script.write(f"SELECT COUNT(*) FROM {table_name};\n")
        sql_script_path = sql_script.name

    # Command to execute the SQL script
    command = f"psql -h {db_params['host']} -p {db_params['port']} -U {db_params['user']} -d {db_params['dbname']} -f {sql_script_path}"

    try:
        # Execute the command
        result = subprocess.run(command, shell=True, env={'PGPASSWORD': db_params['password']}, capture_output=True, text=True)
        if result.stdout:
            # Assuming the count is in the first line of the output
            count = int(result.stdout.splitlines()[0])
            return count
        else:
            print(f"No output received from count command for table {table_name}")
            return 0
    except Exception as e:
        print(f"Error getting row count for {table_name}: {e}")
        return 0
    finally:
        # Clean up temporary file
        os.remove(sql_script_path)

def import_csv_to_db(table_name, db_params, schema):
    csv_file_path = f"/app/outputs/output_{table_name}.csv"  # Adjusted path
    print(f"\tfile : {csv_file_path}")
    if not os.path.exists(csv_file_path):
        print(f"\tCSV file for {table_name} not found. Skipping...")
        return 0

    # # Function to get row count from the table
    # def get_row_count(table_name, db_params):
    #     count_command = f"psql -h {db_params['host']} -p {db_params['port']} -U {db_params['user']} -d {db_params['dbname']} -c \"SELECT COUNT(*) FROM {table_name};\""
    #     try:
    #         result, _ = run_psql_command(count_command, db_params)
    #         return int(result.splitlines()[2])
    #     except Exception as e:
    #         print(f"Error getting row count for {table_name}: {e}")
    #         return 0

    # Get initial row count
    initial_count = get_row_count(table_name, db_params)

    # Read the header (column names) from the CSV file
    with open(csv_file_path, mode='r', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        columns = next(reader)

    # Construct the UPSERT part of the SQL command
    update_columns = ', '.join([f"{col} = EXCLUDED.{col}" for col in columns])

   
    # Create a temporary SQL script
    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.sql') as sql_script:
        sql_script.write(f"SET search_path TO {schema};\n")
        sql_script.write(f"SET session_replication_role = 'replica';\n")
        # Copy command to load data into temporary table
        sql_script.write(f"CREATE TEMP TABLE tmp_{table_name} (LIKE \"{table_name}\" INCLUDING DEFAULTS);\n")
        #sql_script.write(f"COPY tmp_{table_name} FROM '{csv_file_path}' WITH CSV HEADER;\n")
        sql_script.write(f"\\copy tmp_{table_name} FROM '{csv_file_path}' WITH CSV HEADER;\n")
        # UPSERT command to merge data from temporary table into the target table
        sql_script.write(f"INSERT INTO \"{table_name}\" SELECT * FROM tmp_{table_name};\n")
        sql_script.write(f"DROP TABLE tmp_{table_name};\n")
        sql_script_path = sql_script.name

    # Command to execute the SQL script

    command = f"psql -h {db_params['host']} -p {db_params['port']} -U {db_params['user']} -d {db_params['dbname']} -f {sql_script_path}"

    try:
        print(f"\tExecuting : {command}")
        result = subprocess.run(command, shell=True, env={'PGPASSWORD': db_params['password']},
            capture_output=True, text=True)
        
        # Use regex to find the number of rows inserted
        match = re.search(r'INSERT 0 (\d+)', result.stdout)
        if match:
            rows_inserted = int(match.group(1))
            print(f"\tfor table {table_name} inserted {rows_inserted}")
            return rows_inserted
        else:
            print(f"\tNo rows inserted for table {table_name} or unable to parse output.")
            return 0
    except Exception as e:
        print(f"\tError importing CSV for {table_name}: {e}")
    finally:
        # Clean up temporary file
        os.remove(sql_script_path)

    # Get new row count
    new_count = get_row_count(table_name, db_params)

    # Calculate the number of rows inserted
    rows_inserted = new_count - initial_count
    print(f"\tfor table {table_name} inserted {rows_inserted}")
    return rows_inserted
    
def run_psql_command(command, db_params):
    try:
        # Pass the password through environment variables for security
        env_vars = {'PGPASSWORD': db_params['password']}
        completed_process = subprocess.run(command, shell=True, env=env_vars, text=True, capture_output=True, check=True)
        output_lines = completed_process.stdout.splitlines()

        # Debugging: print all output lines
        print("\tOutput from psql command:")
        for line in output_lines:
            print(line)

        # Assuming the actual value is in the third line of the output
        if len(output_lines) >= 3:
            return output_lines[2].strip(), None
        else:
            return "Expected output not found", None

    except subprocess.CalledProcessError as e:
        return None, e.stderr.strip()
# Function to set and then immediately check the session_replication_role
def set_and_check_replication_role(db_params):
    set_role_command = f"psql -h {db_params['host']} -U {db_params['user']} -d {db_params['dbname']} -c \"SET session_replication_role = replica; SHOW session_replication_role;\""
    role_after_setting, error = run_psql_command(set_role_command, db_params)
    if role_after_setting is not None:
        print(f"session_replication_role after setting to replica: {role_after_setting}")
    else:
        print(f"Error: {error}")

# Calling the function

# Main function to execute the import process
def main():
    table_log_file = '/app/outputs/tables_log.txt'  # Adjusted path
    db_params = {
        'host': os.environ.get('DB_HOST', 'localhost'),
        'dbname': os.environ.get('DB_NAME', 'defaultdb'),
        'user': os.environ.get('DB_USER', 'postgres'),
        'password': os.environ.get('DB_PASSWORD', 'password'),
        'port': int(os.environ.get('DB_PORT', 5432))
    }
    schema = os.getenv('DB_SCHEMA', 'public')
    # Execute PostgreSQL configuration commands
    # Check the current session_replication_role
    set_and_check_replication_role(db_params)    

    # Dictionary to hold the count of rows imported for each table
    imported_rows_count = {}
    export_filename = f"/app/outputs/imported_rows_per_table_{db_params['dbname']}_{db_params['user']}_{schema}.txt"
    table_names = extract_table_names(table_log_file)
    for table_name in table_names:
        print(f"Executing import for table {table_name}")
        row_count = import_csv_to_db(table_name, db_params, schema)
        imported_rows_count[table_name] = row_count

    # Write the row counts to a new file
    with open(export_filename, 'w') as f:
        for table, count in imported_rows_count.items():
            f.write(f"{table}: {count}\n")
# Execute the main function
if __name__ == "__main__":
    main()

CSV file for psp.app_deposits not found. Skipping...
CSV file for psp.app_brands not found. Skipping...
CSV file for psp.app_availability_rules not found. Skipping...
