In [17]:
import boto3
import jaydebeapi

# Redshift Serverless configuration
region = 'ap-south-1'
workgroup_name = 'demo-workgroup'
namespace = 'demo-namespace'
db_name = 'dev'
account_id = '20250703T155944'
iam_role = 'arn:aws:iam::310879042055:role/service-role/AmazonRedshift-CommandsAccessRole-20250703T155944'
#'arn:aws:redshift-serverless:us-east-1:310879042055:namespace/281acd8d-bab1-40e0-b841-b44f5cf4b4ce'

# Initialize boto3 client with the IAM role
session = boto3.Session(region_name=region)
sts_client = session.client('sts')
assumed_role = sts_client.assume_role(RoleArn=iam_role, RoleSessionName='RedshiftSession')
credentials = assumed_role['Credentials']

# Configure boto3 client with temporary credentials
redshift_client = boto3.client(
    'redshift',
    region_name=region,
    aws_access_key_id=credentials['AccessKeyId'],
    aws_secret_access_key=credentials['SecretAccessKey'],
    aws_session_token=credentials['SessionToken']
)

# Generate temporary database credentials
db_user = 'admin'  # Replace with your Redshift database user (e.g., 'admin' or another user in 'dev')
response = redshift_client.get_cluster_credentials(
    DbUser=db_user,
    DbName=db_name,
    ClusterIdentifier=workgroup_name,
    AutoCreate=False
)

# JDBC connection string
jdbc_url = f'jdbc:redshift:iam://{workgroup_name}.{account_id}.{region}.redshift-serverless.amazonaws.com:5439/{db_name}'

# Connect to Redshift using JDBC driver
conn = jaydebeapi.connect(
    'com.amazon.redshift.jdbc.Driver',
    jdbc_url,
    [response['DbUser'], response['DbPassword']],
    'path/to/redshift-jdbc42-2.1.0.29.jar'  # Replace with path to JDBC driver
)

# Test the connection with a sample query
cursor = conn.cursor()
cursor.execute('SELECT CURRENT_USER')
print(f"Connected as: {cursor.fetchone()[0]}")

# Clean up
cursor.close()
conn.close()

ClientError: An error occurred (AccessDenied) when calling the GetClusterCredentials operation: User: arn:aws:sts::310879042055:assumed-role/AmazonRedshift-CommandsAccessRole-20250703T155944/RedshiftSession is not authorized to perform: redshift:GetClusterCredentials on resource: arn:aws:redshift:ap-south-1:310879042055:dbuser:demo-workgroup/admin because no identity-based policy allows the redshift:GetClusterCredentials action

In [None]:
'''
{
	"Version": "2012-10-17",
	"Statement": [
		{
			"Effect": "Allow",
			"Principal": {
				"Service": [
					"redshift-serverless.amazonaws.com",
					"sagemaker.amazonaws.com",
					"redshift.amazonaws.com"
				]
			},
			"Action": "sts:AssumeRole"
		}
	]
}'''

In [None]:
import boto3
import pandas as pd
import time
from io import StringIO
import uuid

# --- Redshift Serverless and S3 Configuration (replace with your actual details) ---
AWS_REGION = 'ap-south-1' # Example: 'us-east-1'. Make sure this matches your Redshift region.
REDSHIFT_SERVERLESS_WORKGROUP_NAME = 'demo-workgroup' # e.g., 'redshift-serverless-wg-prod'
REDSHIFT_SERVERLESS_DB_NAME = 'dev' # The database name within your workgroup (e.g., 'dev' or 'master')
# This is the database user that will execute the SQL. This user should exist in your Redshift DB.
# It can be mapped to an IAM user/role for granular permissions within Redshift.
REDSHIFT_DB_USER = 'IAM:RootIdentity' # e.g., 'admin_user' or 'analytics_role'

S3_BUCKET_NAME = 'your-unique-redshift-serverless-upload-bucket' # Must be globally unique
S3_KEY_PREFIX = 'redshift-data-uploads/' # Path within your S3 bucket

# IAM Role ARN that your Redshift Serverless Namespace uses to access S3 for COPY/UNLOAD commands.
# This role needs s3:GetObject permission on S3_BUCKET_NAME.
IAM_ROLE_ARN_FOR_COPY = 'arn:aws:iam::123456789012:role/YourRedshiftServerlessCopyRole'


def execute_redshift_data_api_statement(sql_statement, workgroup_name, database, db_user, wait_for_completion=True):
    """
    Executes a SQL statement on Redshift Serverless using the Redshift Data API.
    Optionally waits for the statement to complete and handles basic error reporting.
    """
    client = boto3.client('redshift-data', region_name=AWS_REGION)

    try:
        response = client.execute_statement(
            WorkgroupName=workgroup_name, # Specify the workgroup for Serverless
            Database=database,
            DbUser=db_user,
            Sql=sql_statement,
            WithEvent=False # Set to True if you want to use EventBridge for async notifications
        )
        statement_id = response['Id']
        print(f"Submitted SQL statement with ID: {statement_id}")

        if wait_for_completion:
            status = ''
            start_time = time.time()
            # Poll for statement status (consider more sophisticated retry/backoff for production)
            while status not in ['FINISHED', 'FAILED', 'ABORTED']:
                time.sleep(2) # Wait 2 seconds before polling again
                desc_response = client.describe_statement(Id=statement_id)
                status = desc_response['Status']
                print(f"Statement {statement_id} status: {status} ({time.time() - start_time:.1f}s)")

                if status == 'FAILED':
                    print(f"Error executing statement: {desc_response.get('Error', 'Unknown error')}")
                    return None
                elif status == 'ABORTED':
                    print(f"Statement was aborted.")
                    return None
            print(f"Statement {statement_id} finished in {time.time() - start_time:.1f} seconds.")
            return statement_id
        else:
            return statement_id # Return ID immediately if not waiting

    except Exception as e:
        print(f"Failed to execute statement via Redshift Data API: {e}")
        return None

def get_redshift_data_api_statement_results(statement_id):
    """Retrieves results of a completed Redshift Data API statement."""
    client = boto3.client('redshift-data', region_name=AWS_REGION)
    try:
        response = client.get_statement_result(Id=statement_id)
        # 'Records' is a list of lists, where each inner list contains column values
        # formatted as {'stringValue': '...', 'longValue': '...', etc.}
        records = response.get('Records', [])

        # For simple queries like listing databases, we can extract string values.
        # For complex queries, you might need to iterate through column metadata and parse.
        results = []
        for record in records:
            # Assuming the first value is the one we want for simple listings
            if record and isinstance(record, list) and len(record) > 0:
                if 'stringValue' in record[0]:
                    results.append(record[0]['stringValue'])
                elif 'longValue' in record[0]:
                    results.append(record[0]['longValue'])
                # Add other types as needed
        return results
    except Exception as e:
        print(f"Error getting statement results: {e}")
        return []

def list_redshift_serverless_databases(workgroup_name, database, db_user):
    """Lists databases in Redshift Serverless using the Data API."""
    sql = "SELECT datname FROM pg_database;"
    statement_id = execute_redshift_data_api_statement(sql, workgroup_name, database, db_user)

    if statement_id:
        databases = get_redshift_data_api_statement_results(statement_id)
        print("\nRedshift Serverless Databases:")
        if databases:
            for db in databases:
                print(f"- {db}")
        else:
            print("No databases found or results could not be retrieved.")
        return databases
    return []

def upload_dataframe_to_redshift_serverless(
    df,
    table_name,
    workgroup_name,
    database,
    db_user,
    s3_bucket,
    s3_key_prefix,
    iam_role_arn_for_copy,
    create_table_sql=None
):
    """
    Uploads a pandas DataFrame to Redshift Serverless using S3 and the COPY command via Data API.
    """
    s3_client = boto3.client('s3', region_name=AWS_REGION)

    # Convert DataFrame to CSV in memory (without header or index for COPY)
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False, header=False, sep=',')

    # Generate a unique S3 object key
    s3_file_name = f"{s3_key_prefix}{table_name}_{uuid.uuid4()}.csv"
    s3_path = f"s3://{s3_bucket}/{s3_file_name}"

    try:
        # 1. Upload DataFrame data to S3
        print(f"Uploading data to S3: {s3_path}...")
        s3_client.put_object(Bucket=s3_bucket, Key=s3_file_name, Body=csv_buffer.getvalue())
        print(f"Data successfully uploaded to S3.")

        # 2. (Optional) Create table if it doesn't exist
        if create_table_sql:
            print(f"Checking/Creating table '{table_name}'...")
            create_stmt_id = execute_redshift_data_api_statement(
                create_table_sql, workgroup_name, database, db_user
            )
            if create_stmt_id:
                print(f"Table creation statement processed for '{table_name}'.")
            else:
                print(f"Failed to process table creation statement for '{table_name}'. Data load might fail.")

        # 3. Execute Redshift COPY command via Data API
        copy_command = f"""
        COPY {table_name}
        FROM '{s3_path}'
        IAM_ROLE '{iam_role_arn_for_copy}'
        CSV
        IGNOREHEADER 0;
        """
        print(f"Executing COPY command for table '{table_name}'...")
        copy_stmt_id = execute_redshift_data_api_statement(
            copy_command, workgroup_name, database, db_user
        )

        if copy_stmt_id:
            print(f"Data successfully loaded into Redshift Serverless table '{table_name}'.")
            # 4. (Optional) Clean up temporary S3 file
            s3_client.delete_object(Bucket=s3_bucket, Key=s3_file_name)
            print(f"Temporary S3 file deleted: {s3_path}")
        else:
            print(f"Data load into '{table_name}' failed or was aborted.")

    except Exception as e:
        print(f"An error occurred during data upload: {e}")
    finally:
        csv_buffer.close()

# --- Main execution block ---
if __name__ == "__main__":
    # Ensure your AWS credentials are configured (e.g., via `aws configure` in your terminal,
    # or by setting environment variables like AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
    # or if running on an EC2 instance/Lambda, attach an IAM role with necessary permissions).

    print("Attempting to list databases in Redshift Serverless...")
    listed_databases = list_redshift_serverless_databases(
        REDSHIFT_SERVERLESS_WORKGROUP_NAME,
        REDSHIFT_SERVERLESS_DB_NAME,
        REDSHIFT_DB_USER
    )

    # Example DataFrame to upload
    sample_data = {
        'event_id': [1, 2, 3, 4],
        'event_name': ['Login', 'Logout', 'Purchase', 'PageView'],
        'timestamp': ['2025-07-03 10:00:00', '2025-07-03 10:30:00', '2025-07-03 11:00:00', '2025-07-03 11:15:00'],
        'user_id': [101, 101, 102, 103]
    }
    df_to_upload = pd.DataFrame(sample_data)
    target_table = 'user_events'

    # SQL to create the target table if it doesn't exist
    # Adjust column names and data types as per your DataFrame
    create_table_sql = f"""
    CREATE TABLE IF NOT EXISTS {target_table} (
        event_id INT,
        event_name VARCHAR(255),
        timestamp TIMESTAMP,
        user_id INT
    );
    """

    print(f"\nAttempting to upload data to table: {target_table}")
    upload_dataframe_to_redshift_serverless(
        df_to_upload,
        target_table,
        REDSHIFT_SERVERLESS_WORKGROUP_NAME,
        REDSHIFT_SERVERLESS_DB_NAME,
        REDSHIFT_DB_USER,
        S3_BUCKET_NAME,
        S3_KEY_PREFIX,
        IAM_ROLE_ARN_FOR_COPY,
        create_table_sql=create_table_sql
    )

    print("\nScript execution complete.")