In [1]:
import redshift_connector
import boto3
import os
import pandas as pd # Optional, for showing dataframes

# --- Configuration ---
# Replace with your actual Redshift cluster details
DATABASE_NAME='dev' # Or your specific database name
DB_USER='admin' # A Redshift database user that exists in your cluster
AWS_REGION='us-east-1' # e.g., 'us-east-1'
REDSHIFT_SERVERLESS_WORKGROUP_NAME='demo-workgroup-01'

# S3 bucket for COPY/UNLOAD (must exist and have Redshift role access)
S3_BUCKET_NAME = 'kkm2-unique-test-bucket-2025-06-26-py-1'
S3_FILE_TO_UNLOAD = 'redshift_sample.csv'
S3_UNLOAD_PATH = f's3://{S3_BUCKET_NAME}/{S3_FILE_TO_UNLOAD}'
S3_COPY_PATH = f's3://{S3_BUCKET_NAME}/flights.csv' # Path to your existing sample data



# Endpoint
# default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com:5439/dev
# JDBC URL
# jdbc:redshift://default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com:5439/dev
# ODBC URL
# Driver={Amazon Redshift (x64)}; Server=default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com; Database=dev




# Ensure your AWS CLI is configured or environment variables are set
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION

print("--- Starting Redshift Python Demo ---")

# --- 1. Securely Connect using IAM Authentication ---
# This is the recommended way. redshift_connector uses boto3's underlying
# credential chain (environment variables, ~/.aws/credentials, IAM roles).
print("\n1. Establishing secure connection to Redshift using IAM...")
try:
    conn = redshift_connector.connect(
        iam=True,
        # For SERVERLESS, use workgroup_name
        workgroup_name=REDSHIFT_SERVERLESS_WORKGROUP_NAME, # <<<<<< Make sure this line is present
        database=DATABASE_NAME,
        db_user=DB_USER,
        region=AWS_REGION
    )

    # conn = redshift_connector.connect(
    #     iam=True, # Enable IAM authentication
    #     # workgroup_name=WORKGROUP_NAME, # Specify the Redshift Serverless Workgroup
    #     database=DATABASE_NAME,
    #     # db_user=DB_USER, # Often not needed if using IAM authentication
    #     region=AWS_REGION
    # )
    print("REDSHIFT: Connection successful!")
    cursor = conn.cursor()

except Exception as e:
    print(f"Error connecting to Redshift: {e}")
    print("Please ensure your AWS CLI is configured, Redshift cluster is accessible,")
    print("and the IAM user/role has 'redshift:GetClusterCredentials' permission.")
    exit() # Exit if connection fails, as nothing else will work

# --- 2. Execute a Simple Query ---
print("\n2. Executing a simple SELECT query:")
try:
    cursor.execute("SELECT id, name, city FROM users LIMIT 5;")
    results = cursor.fetchall()

    if results:
        print("Query Results:")
        for row in results:
            print(row)
    else:
        print("No results found.")
except Exception as e:
    print(f"Error executing query: {e}")

# --- 3. Parameterized Query (Prevent SQL Injection) ---
print("\n3. Demonstrating a parameterized query:")
try:
    city_filter = 'London'
    cursor.execute("SELECT id, name FROM users WHERE city = %s;", (city_filter,))
    filtered_results = cursor.fetchall()
    print(f"Users in {city_filter}:")
    for row in filtered_results:
        print(row)
except Exception as e:
    print(f"Error with parameterized query: {e}")

# --- 4. INSERT Data ---
print("\n4. Inserting new data:")
try:
    new_user_id = 4
    new_user_name = 'Alice Wonderland'
    new_user_city = 'Seattle'
    cursor.execute("INSERT INTO users (id, name, city) VALUES (%s, %s, %s);",
                   (new_user_id, new_user_name, new_user_city))
    conn.commit() # Commit the transaction
    print(f"Inserted user: {new_user_name}")

    # Verify insertion
    cursor.execute(f"SELECT * FROM users WHERE id = {new_user_id};")
    print("Verifying insertion:")
    print(cursor.fetchone())

except Exception as e:
    conn.rollback() # Rollback on error
    print(f"Error inserting data: {e}")

# --- 5. UNLOAD Data to S3 ---
# Requires IAM role associated with Redshift cluster to have S3 write permissions
print("\n5. Unloading data to S3:")
try:
    # Ensure the IAM role attached to your Redshift cluster has write access to S3_BUCKET_NAME
    # Example: 'arn:aws:iam::ACCOUNT_ID:role/YourRedshiftClusterIAMRole'
    iam_role_arn = 'arn:aws:iam::YOUR_AWS_ACCOUNT_ID:role/YourRedshiftClusterIAMRole' # REPLACE THIS

    unload_query = f"""
    UNLOAD ('SELECT id, name, city FROM users')
    TO '{S3_UNLOAD_PATH}'
    IAM_ROLE '{iam_role_arn}'
    CSV
    HEADER
    PARALLEL OFF; -- For small demos, PARALLEL OFF makes it one file. Remove for large datasets.
    """
    print(f"Executing UNLOAD query to {S3_UNLOAD_PATH}...")
    cursor.execute(unload_query)
    conn.commit()
    print("Data successfully unloaded to S3. Check your S3 bucket!")
    # Optional: Verify S3 file exists using boto3
    s3_client = boto3.client('s3', region_name=AWS_REGION)
    try:
        s3_client.head_object(Bucket=S3_BUCKET_NAME, Key=S3_FILE_TO_UNLOAD + '000') # Check for part file
        print(f"Confirmed file {S3_FILE_TO_UNLOAD} exists in S3.")
    except Exception as s3_e:
        print(f"Could not confirm S3 file presence (may take a moment to appear): {s3_e}")

except Exception as e:
    conn.rollback()
    print(f"Error unloading data to S3: {e}")
    print("Ensure the IAM role associated with your Redshift cluster has S3 write permissions.")

# --- 6. (Optional) Show Data API usage with boto3 ---
# This is an alternative to persistent connections, good for serverless/asynchronous needs.
print("\n6. (Optional) Demonstrating Redshift Data API (using boto3):")
try:
    redshift_data_client = boto3.client('redshift-data', region_name=AWS_REGION)

    # Execute a statement
    statement_response = redshift_data_client.execute_statement(
        ClusterIdentifier=CLUSTER_IDENTIFIER,
        Database=DATABASE_NAME,
        DbUser=DB_USER, # The Redshift database user
        Sql="SELECT * FROM users WHERE id = 1;"
    )
    statement_id = statement_response['Id']
    print(f"Data API Statement ID: {statement_id}")

    # Wait for statement to complete (for demo purposes)
    import time
    status = ''
    while status not in ['FINISHED', 'FAILED', 'ABORTED']:
        time.sleep(1)
        description_response = redshift_data_client.describe_statement(Id=statement_id)
        status = description_response['Status']
        print(f"Statement status: {status}")

    if status == 'FINISHED':
        result_response = redshift_data_client.get_statement_result(Id=statement_id)
        print("Data API Results:")
        # Data API results are in a specific format, you'd usually parse them
        if result_response and result_response.get('Records'):
            # Extract column names
            column_names = [col['label'] for col in result_response['ColumnMetadata']]
            print(column_names)
            for record in result_response['Records']:
                # Each record is a list of dictionaries, where each dict has a key matching the data type (e.g., 'longValue', 'stringValue')
                row_values = [list(col.values())[0] for col in record]
                print(row_values)
        else:
            print("No results from Data API.")
    else:
        print(f"Data API query failed or was aborted: {description_response.get('Error')}")

except Exception as e:
    print(f"Error using Redshift Data API: {e}")
    print("Ensure the IAM user/role has 'redshift-data:*' permissions.")


# --- Cleanup ---
print("\n--- Cleaning up ---")
try:
    cursor.close()
    conn.close()
    print("Connection closed.")
except Exception as e:
    print(f"Error during cleanup: {e}")

print("\n--- Demo Complete! ---")

--- Starting Redshift Python Demo ---

1. Establishing secure connection to Redshift using IAM...
Error connecting to Redshift: connect() got an unexpected keyword argument 'workgroup_name'
Please ensure your AWS CLI is configured, Redshift cluster is accessible,
and the IAM user/role has 'redshift:GetClusterCredentials' permission.

2. Executing a simple SELECT query:
Error executing query: name 'cursor' is not defined

3. Demonstrating a parameterized query:
Error with parameterized query: name 'cursor' is not defined

4. Inserting new data:


NameError: name 'conn' is not defined

In [7]:
import psycopg2

# Redshift Serverless connection details
host = "default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com"
#:5439/dev"
#'<your-workgroup-name>.<random>.redshift-serverless.<region>.amazonaws.com'

port = 5439
dbname = 'dev'
user = 'admin'
password = 'KkmAcl4526'

try:
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password
    )
    print("Connected to Redshift Serverless!")

    cur = conn.cursor()
    cur.execute("SELECT current_date;")
    result = cur.fetchone()
    print("Current date in Redshift:", result[0])

    cur.execute("SELECT current_user, current_database();")
    result = cur.fetchone()
    print("Current user and database name> ", result)

    cur.execute("SELECT schema_name FROM information_schema.schemata ORDER BY 1;")
    result = cur.fetchone()
    print("Schemaas ", result)

    cur.execute('''SELECT
    trim(n.nspname)   AS schema,
    trim(t.relname)   AS table,
    si.size/1024/1024 AS MB
    FROM pg_class t
    JOIN pg_namespace n ON n.oid = t.relnamespace
    JOIN svv_table_info si ON si.table = t.relname
    WHERE n.nspname NOT IN ('pg_internal', 'pg_catalog', 'information_schema')
    ORDER BY MB DESC
    LIMIT 10;''')
    result = cur.fetchone()
    print(result)

    cur.close()
    conn.close()
except Exception as e:
    print("Connection failed:", e)


Connected to Redshift Serverless!
Current date in Redshift: 2025-07-08
Current user and database name>  ('admin', 'dev')
Schemaas  None
None


In [2]:
import psycopg2

# Redshift Serverless connection details
host = "default-workgroup.xx.us-east-1.redshift-serverless.amazonaws.com"
#:5439/dev"
#'<your-workgroup-name>.<random>.redshift-serverless.<region>.amazonaws.com'

port = 5439
dbname = 'dev'
user = 'admin'
password = 'xxx'

try:
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password
    )
    print("Connected to Redshift Serverless!")

    cur = conn.cursor()
    cur.execute("SELECT current_date;")
    result = cur.fetchone()
    print("Current date in Redshift:", result[0])

    cur.execute("SELECT current_user, current_database();")
    result = cur.fetchone()
    print("Current user and database name> ", result)

    cur.execute("SELECT schema_name FROM information_schema.schemata ORDER BY 1;")
    result = cur.fetchone()
    print("Schemaas ", result)

    cur.execute('''SELECT
    trim(n.nspname)   AS schema,
    trim(t.relname)   AS table,
    si.size/1024/1024 AS MB
    FROM pg_class t
    JOIN pg_namespace n ON n.oid = t.relnamespace
    JOIN svv_table_info si ON si.table = t.relname
    WHERE n.nspname NOT IN ('pg_internal', 'pg_catalog', 'information_schema')
    ORDER BY MB DESC
    LIMIT 10;''')
    result = cur.fetchone()
    print(result)

    cur.close()
    conn.close()
except Exception as e:
    print("Connection failed:", e)


Connected to Redshift Serverless!
Current date in Redshift: 2025-07-08
Current user and database name>  ('admin', 'dev')
Schemaas  None
None


In [14]:
# 1. Simple conect and read a table

def get_conn_from_redshift():
    # Redshift Serverless connection details
    host = "default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com"
    #:5439/dev"
    #'<your-workgroup-name>.<random>.redshift-serverless.<region>.amazonaws.com'
    
    port = 5439
    dbname = 'dev'
    user = 'admin'
    password = 'KkmAcl4526'
    
    try:
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to Redshift Serverless!")
        return conn
    except:
        print("### ERROR in connecting ####")


# 1. Get Connection
conn = get_conn_from_redshift()

# # 2. Read table
# import pandas as pd
# df = pd.read_sql("SELECT * FROM sales LIMIT 100", conn)
# print(df.head())

Connected to Redshift Serverless!


In [1]:
#
import psycopg2
def get_conn_redshift():
    host = "default-workgroup.310879042055.us-east-1.redshift-serverless.amazonaws.com"
    #:5439/dev"
    #'<your-workgroup-name>.<random>.redshift-serverless.<region>.amazonaws.com'
    
    port = 5439
    dbname = 'dev'
    user = 'admin'
    password = 'KkmAcl4526'
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password,
        connect_timeout=10,
        keepalives=1,
        keepalives_idle=10,
        keepalives_interval=10,
        keepalives_count=5,
        sslmode="require",          # Redshift enforces TLS
    )
    print("✅ Connected to Redshift Serverless!")
    return conn
conn = get_conn_from_redshift()


# 2. Create a sample table
DDL = """
CREATE TABLE IF NOT EXISTS sales (
    id      BIGINT IDENTITY,
    ts      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    sku     VARCHAR(32),
    qty     INT,
    price   NUMERIC(12,2)
);
INSERT INTO sales (sku, qty, price)
VALUES ('ABC‑001', 2, 199.99), ('ABC‑002', 1, 349.00);
"""

cur = conn.cursor()
resp = cur.execute(DDL)
print(resp)

NameError: name 'get_conn_from_redshift' is not defined

In [22]:
COPY_CMD = f"""
COPY sales
FROM 's3://kkm2-unique-test-bucket-2025-06-26-py-1/sales.csv'
IAM_ROLE 'arn:aws:iam::310879042055:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift'
FORMAT AS PARQUET;
"""
cur.execute(COPY_CMD)


InternalError_: exception name : UnauthorizedException, error type : 138, message: The requested role arn:aws:iam::310879042055:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift is not associated to cluster, should retry : 0
DETAIL:  
  -----------------------------------------------
  error:  exception name : UnauthorizedException, error type : 138, message: The requested role arn:aws:iam::310879042055:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift is not associated to cluster, should retry : 0
  code:      30000
  context:   
  query:     409640[child_sequence:4]
  location:  xen_aws_credentials_mgr.cpp:785
  process:   padbmaster [pid=1073889539]
  -----------------------------------------------

