In [1]:
!pip install -q starrocks "sqlalchemy<3.0"

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
# This CATALOG_URL works for the "docker compose" testing and development environment
# Change 'lakekeeper' if you are not running on "docker compose" (f. ex. 'localhost' if Lakekeeper is running locally).
CATALOG_URL = "http://lakekeeper:8181/catalog"
STARROCKS_URI = "starrocks://root@starrocks:9030"
WAREHOUSE = "sepahram"

# Create Starrocks Catalog

In [4]:
from sqlalchemy import create_engine, text

engine = create_engine(STARROCKS_URI)

In [6]:
# Until https://github.com/StarRocks/starrocks/issues/50585 is fixed, we have to configure
# the credentials locally if we need a custom s3 endpoint.
with engine.connect() as connection:
    connection.execute(text("DROP CATALOG IF EXISTS lakekeeper"))
    connection.execute(
        text(f"""
        CREATE EXTERNAL CATALOG lakekeeper
        PROPERTIES
        (
            "type" = "iceberg",
            "iceberg.catalog.type" = "rest",
            "iceberg.catalog.uri" = "{CATALOG_URL}",
            "iceberg.catalog.warehouse" = "{WAREHOUSE}",
            "aws.s3.region" = "local",
            "aws.s3.enable_path_style_access" = "true",
            "aws.s3.endpoint" = "http://minio:9000",
            "aws.s3.access_key" = "minio-root-user",
            "aws.s3.secret_key" = "minio-root-password"
        )
        """)
    )
    connection.execute(text("SET CATALOG lakekeeper"))

## Read and Write Tables

In [None]:
with engine.connect() as connection:
    # connection.execute(text("CREATE DATABASE starrocks_namespace"))
    # connection.execute(
    #     text("CREATE TABLE starrocks_namespace.my_table (my_ints INT, my_floats DOUBLE, strings VARCHAR)")
    # )
    connection.execute(
        text("INSERT INTO starrocks_namespace.my_table VALUES (1, 1.0, 'a'), (2, 2.0, 'b')")
    )
    result = connection.execute(
        text("SELECT * FROM starrocks_namespace.my_table"),
    ).fetchall()

print(result)

In [8]:
from sqlalchemy import text

with engine.connect() as connection:
    # Make sure the catalog is set (you already created it)
    connection.execute(text("SET CATALOG lakekeeper"))
    
    # Optional: set the namespace / database you want to work with
    connection.execute(text("USE banking"))

    # Query the transactions table
    result = connection.execute(
        text("SELECT * FROM source_transactions LIMIT 10")
    ).fetchall()

# Print the result
# for row in result:
print(result)


[('7505832a-43c9-4bde-8c40-7096f59dec84', 'nicole52', datetime.datetime(2025, 9, 10, 14, 16, 8, 587542), 715.8, 'USD', 'Ortizmouth', 'Pakistan', 'Turner Ltd', 'online_transfer', '85.120.254.130', '', '7c25f309-97c4-45b2-b78e-41d0e4f1cd27'), ('42b83d9a-61c2-4591-b10b-850c9b46f54d', 'sophia46', datetime.datetime(2025, 9, 10, 14, 16, 8, 980020), 864.91, 'USD', 'Burketown', 'Guernsey', 'Brown-Young', 'credit_card', '88.105.196.30', '', '750ae68d-ab66-4030-aa7c-4c37b6df8e11'), ('516e8372-8dd9-4a98-9bf2-dce20afce867', 'egarner', datetime.datetime(2025, 9, 10, 14, 16, 9, 978607), 498.27, 'USD', 'North Marioburgh', 'Netherlands Antilles', 'Roy-Kim', 'online_transfer', '197.177.221.2', '', '6a8c8746-64b2-4cd4-b3d6-c933fc6114ac'), ('0aafbde5-7c54-4c49-aec0-3b89cdc5b96d', 'mayobrittany', datetime.datetime(2025, 9, 10, 14, 16, 10, 630081), 832.31, 'GBP', 'Nicholsview', 'Denmark', 'Martin-Elliott', 'online_transfer', '35.145.166.249', 'DISCOUNT10', '9532f2b2-bf55-4229-8339-70d934916dc3'), ('10b621d

In [10]:
import pandas as pd
from sqlalchemy import text

with engine.connect() as connection:
    # Use the catalog and namespace
    connection.execute(text("SET CATALOG lakekeeper"))
    connection.execute(text("USE banking"))
    
    # Execute the query
    result = connection.execute(
        text("SELECT * FROM source_transactions LIMIT 10")
    ).fetchall()
    
    # Get column names
    columns = [col[0] for col in connection.execute(text("SELECT * FROM source_transactions LIMIT 1")).keys()]

# Convert to pandas DataFrame
df = pd.DataFrame(result, columns=columns)

# Display the DataFrame nicely (works in Jupyter/console)
df


Unnamed: 0,t,u,t.1,a,c,c.1,c.2,m,p,i,v,a.1
0,5b4c0a8f-2375-4ae6-9cc6-83afeb89842b,whitedawn,2025-09-10 14:16:20.467782,585.82,GBP,North Roytown,Cote d'Ivoire,"Herrera, Valdez and Miller",debit_card,117.116.185.113,,97ff79ba-78cb-4077-ab4f-d91f1ab3868a
1,343dda76-a269-4b72-b8e7-4c069948378f,lauren54,2025-09-10 14:16:20.648055,619.24,GBP,East Jamesland,Equatorial Guinea,"Green, Anderson and Lane",debit_card,22.79.9.13,,8bcd676b-32d2-4dab-b8da-59014914644e
2,ee78a903-18c0-451b-8926-4d21caa467b3,chambersjames,2025-09-10 14:16:21.423376,635.59,USD,Stevenbury,Gibraltar,"Vang, Cruz and Butler",debit_card,176.72.91.166,,8317ac6e-6f77-47b3-ae9e-1d0c2392de65
3,5bd604f3-d861-4320-8331-ac7dd55e991e,hmcgrath,2025-09-10 14:16:22.257963,711.6,GBP,Port Scott,Japan,Campbell Group,online_transfer,167.28.119.188,,3d7ef29c-237e-4d7e-94d4-2119ee37ce86
4,51828a2b-f6ae-4c23-9fdc-761230059613,lcarr,2025-09-10 14:16:22.793240,542.91,GBP,Port William,Kazakhstan,Zamora-Rice,debit_card,74.183.243.96,DISCOUNT10,9c95afd1-c248-48eb-9882-7184cc75a413
5,e805ccb8-3189-4e39-918d-d54d88ae8e3e,kelly59,2025-09-10 14:16:23.370020,890.55,USD,New Joshua,Sri Lanka,Henry and Sons,online_transfer,45.174.170.60,,71b8ae58-a1eb-4693-b47a-5b7d9b1ac439
6,d59a9947-0672-4935-813c-d1ace0b830cd,kelseymassey,2025-09-10 14:16:23.908374,210.82,GBP,Carrollborough,Iraq,"Sheppard, Moody and Murray",online_transfer,97.24.94.100,,131e6b99-6c15-4443-ab93-bec85c32af28
7,07a49f09-0550-4089-974e-9b68f6b42fb6,steven06,2025-09-10 14:16:24.385676,494.84,USD,Tapiafort,Botswana,Spencer-Rose,online_transfer,202.210.34.13,,b5143395-186f-4665-bc68-c2b11815522e
8,4896fcf5-3538-411a-8135-e440ea03359b,christianmclean,2025-09-10 14:16:24.617399,937.74,GBP,East Joshua,Tonga,Gonzalez-Foster,debit_card,197.165.61.53,,2c5695e8-9440-47a0-8505-461b820c3fb9
9,2f50c014-68fb-4158-86ae-41e087dbc954,nicoleberry,2025-09-10 14:16:25.605106,323.97,USD,East Kellymouth,Comoros,"Patel, Kelly and Trevino",debit_card,66.78.183.227,,a7af3b2d-6b21-49f8-92a9-acebd70a0c73


In [13]:
import pandas as pd
from sqlalchemy import text

with engine.connect() as connection:
    # Use the catalog and namespace
    connection.execute(text("SET CATALOG lakekeeper"))
    connection.execute(text("USE banking"))
    
    # Execute the query
    result_proxy = connection.execute(
        text("""
            SELECT
                userId,
                transactionId,
                amount,
                currency,
                merchantName,
                timestamp
            FROM banking.source_transactions
            WHERE amount > 900
            ORDER BY amount DESC
            LIMIT 20
        """)
    )
    
    # Get the rows
    rows = result_proxy.fetchall()
    
    # Get the column names
    columns = result_proxy.keys()

# Convert to pandas DataFrame
df = pd.DataFrame(rows, columns=columns)

# Display nicely
df


Unnamed: 0,userId,transactionId,amount,currency,merchantName,timestamp
0,thomaslopez,cf2f5d05-3c8b-4a4e-99b5-aa816a460e31,999.98,USD,Anderson LLC,2025-09-10 14:15:03.426350
1,christopher94,97506059-9cb8-4901-9098-62ab5b2a7b65,999.48,USD,"Smith, Carlson and Sandoval",2025-09-10 14:15:23.167811
2,joanne50,d931f890-bba7-46f9-b16a-2dfdcfad75aa,998.81,USD,Dorsey Inc,2025-09-10 14:15:28.695568
3,ngibson,bb4c35e5-baa0-4e59-9165-93cfa8c75173,998.77,USD,Watts-Martinez,2025-09-10 14:13:50.567728
4,tonyahall,c23c7604-57ab-4a48-af72-706ce399bef4,995.22,USD,"Johnson, Wallace and Garcia",2025-09-10 14:14:20.240521
5,bradleydavid,6da2647b-3bcb-4add-8b6f-6deda25cdc87,995.11,USD,Duarte LLC,2025-09-10 14:08:41.692850
6,rivaskatherine,87fda341-c287-488d-afd3-b1001279d116,995.02,GBP,"Hensley, Johnson and Schaefer",2025-09-10 14:14:54.017038
7,aliciahoward,b407add3-2e65-4888-a127-9f726679d71d,994.75,USD,"Brady, Dean and Martinez",2025-09-10 14:12:48.756820
8,conwayjeffrey,e0c98de9-f907-4cd8-abad-474b252368ba,992.5,GBP,Grimes Group,2025-09-10 14:08:17.236881
9,meganjones,aa1c7f8d-35c6-4ef1-98c4-06ffa47474f1,992.0,GBP,"Koch, Miller and Grimes",2025-09-10 14:10:48.561394
