In [50]:
import boto3
import logging

In [61]:
# Create a logger object
logger = logging.getLogger("delete_tables")

# Set the logging level to INFO
logger.setLevel(logging.INFO)

# Create a FileHandler to write the log messages to a file
file_handler = logging.FileHandler('logs/delete_tables.log')
file_handler.setLevel(logging.INFO)

# Create a StreamHandler to write the log messages to the console
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

# Create a formatter to format the log messages
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Set the formatter for the handlers
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Add the handlers to the logger object
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [62]:
# Log files destination place
bucket_name = "abc" # change bucket name
key_path = f"cleaning_test_data_and_tables_temp_remove_after_2024.01"

# Athena configuration for Athena query log files
output_location = f"s3://{bucket_name}/{key_path}/athena_query_output_delete_tables/"

In [None]:
# Read table names from a file
with open("tables_to_delete.txt") as f:
    tables = f.read().splitlines()
len(tables)

In [None]:
# Create Athena and S3 client
client = boto3.client("athena")
s3 = boto3.client("s3")

# Loop through the list of tables and drop each table if exists
for table in tables:
    split_name = table.split(".")

    database_name = split_name[0]
    table_name = split_name[1]

    # Build the SQL query
    sql_query = f"SELECT count(*) FROM {database_name}.{table_name}"
    # !!! Uncomment below line and change QueryString value in DEPLOY stage !!!
    #sql_drop_table_query = f"DROP TABLE IF EXISTS {database_name}.{table_name}"

    # Execute the query
    response = client.start_query_execution(
        QueryString=sql_query,
        QueryExecutionContext={
            'Database': database_name
        },
        ResultConfiguration={
            'OutputLocation': output_location
        }
    )

    # Query execution status
    query_execution_id = response['QueryExecutionId']
    query_status = client.get_query_execution(QueryExecutionId=query_execution_id)['QueryExecution']['Status']['State']

    # Wait for the query to complete
    while query_status in ['QUEUED', 'RUNNING']:
        query_status = client.get_query_execution(QueryExecutionId=query_execution_id)['QueryExecution']['Status']['State']

    # Query results
    if query_status == 'SUCCEEDED':
        result = client.get_query_results(QueryExecutionId=query_execution_id)

    logger.info(f"Query status: {query_status} ::: Table: {table_name} ::: Query execution ID: {response['QueryExecutionId']}")

In [66]:
# Copy log file to s3
file_name = "delete_tables.log"
file_local_path = f"./logs/{file_name}"
key = f"{key_path}/logs/{file_name}"
s3.upload_file(file_local_path, bucket_name, key)