In [None]:
import sys
import boto3

# Define the parameters
SOURCE_TABLE_NAME = 'jobdetails_jobs_with_details'
NEW_TABLE_NAME = 'jobdetails_jobs_with_details_parquet_tbl'
NEW_TABLE_S3_BUCKET = 's3://jobdetails-parquet-tbl-1/'
MY_DATABASE = 'de_proj_database2'
QUERY_RESULTS_S3_BUCKET = 's3://my-athena-bucket-dec2024'

# Initialize Athena client
client = boto3.client('athena')

# Start the query execution
queryStart = client.start_query_execution(
    QueryString=f"""
    CREATE TABLE {NEW_TABLE_NAME} WITH (
        external_location='{NEW_TABLE_S3_BUCKET}',
        format='PARQUET',
        write_compression='SNAPPY',
        partitioned_by = ARRAY['yr_mo_partition']
    ) AS
    SELECT
        job_id,
        title,
        company_name,
        location,
        salary,
        created,
        last_updated,
        seniority,
        description,
        external_url,
        applicants_count,
        timestamp,
        SUBSTRING(last_updated, 1, 7) AS yr_mo_partition
    FROM "{MY_DATABASE}"."{SOURCE_TABLE_NAME}"
    ;
    """,
    QueryExecutionContext={
        'Database': f'{MY_DATABASE}'
    },
    ResultConfiguration={'OutputLocation': f'{QUERY_RESULTS_S3_BUCKET}'}
)

# List of possible query statuses
resp = ["FAILED", "SUCCEEDED", "CANCELLED"]

# Poll the query execution status
response = client.get_query_execution(QueryExecutionId=queryStart["QueryExecutionId"])
while response["QueryExecution"]["Status"]["State"] not in resp:
    response = client.get_query_execution(QueryExecutionId=queryStart["QueryExecutionId"])

# Handle failed query execution
if response["QueryExecution"]["Status"]["State"] == 'FAILED':
    sys.exit(response["QueryExecution"]["Status"]["StateChangeReason"])

# Log the success
print(f"Query succeeded. Parquet table created: {NEW_TABLE_NAME}")
