In [None]:
import boto3

# ========================== #
#     AWS Glue → Redshift    #
# Auto Table Creation Script #
# ========================== #

# --- Configuration Block ---
# Define your AWS resources and connection parameters here
glue_db = "aw_db"  # Name of the Glue catalog database
redshift_cluster_id = "redshift-cluster-2"  # Your Redshift cluster identifier
redshift_db = "dev"  # Redshift database name
redshift_user = "awsuser"  # Redshift user with DDL privileges
redshift_schema = "public"  # Schema in Redshift to create tables
region = "ap-south-1"  # AWS Region

# --- Initialize Boto3 Clients ---
# These clients allow us to interact with Glue and Redshift using the AWS SDK
glue = boto3.client("glue", region_name=region)
redshift = boto3.client("redshift-data", region_name=region)



In [None]:
# --- Glue to Redshift Type Mapper ---
# Converts Glue data types to compatible Redshift data types
def map_glue_to_redshift(glue_type):
    mapping = {
        "string": "VARCHAR(256)",    # Default string size — tune based on your workload
        "int": "INTEGER",
        "bigint": "BIGINT",
        "double": "FLOAT8",
        "float": "FLOAT4",
        "boolean": "BOOLEAN",
        "timestamp": "TIMESTAMP",
        "date": "DATE"
    }
    # Fallback to VARCHAR for any unknown type to avoid breaking DDL
    return mapping.get(glue_type.lower(), "VARCHAR(256)")



In [None]:
# --- Retrieve All Tables from Glue Catalog ---
# Uses pagination to fetch metadata for all tables under the specified Glue database
paginator = glue.get_paginator("get_tables")
pages = paginator.paginate(DatabaseName=glue_db)

all_tables = []
for page in pages:
    for table in page["TableList"]:
        all_tables.append(table["Name"])

print(f"Found {len(all_tables)} tables in Glue database '{glue_db}'")



In [None]:
# --- Table Creation Loop ---
# For every Glue table, construct a Redshift-compatible DDL and execute it
for table in all_tables:
    # Get full metadata for the current table
    response = glue.get_table(DatabaseName=glue_db, Name=table)
    columns = response["Table"]["StorageDescriptor"]["Columns"]

    # Construct CREATE TABLE IF NOT EXISTS SQL statement
    ddl = f"CREATE TABLE IF NOT EXISTS {redshift_schema}.{table} (\n"
    ddl += ",\n".join([
        f"  {col['Name']} {map_glue_to_redshift(col['Type'])}"
        for col in columns
    ])
    ddl += "\n);"

    print(f"\nCreating table in Redshift: {table}")
    print(ddl)  # Optional: Log the SQL statement for debugging or audit

    # --- Execute DDL in Redshift ---
    try:
        result = redshift.execute_statement(
            ClusterIdentifier=redshift_cluster_id,
            Database=redshift_db,
            DbUser=redshift_user,
            Sql=ddl
        )
        print(f" Table '{table}' created successfully in Redshift.")
    except Exception as e:
        # Catch and log any error for visibility — important for debugging and CI/CD
        print(f"Failed to create table '{table}': {e}")
