In [1]:
!pip install -q trino

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
# This CATALOG_URL works for the "docker compose" testing and development environment
# Change 'lakekeeper' if you are not running on "docker compose" (f. ex. 'localhost' if Lakekeeper is running locally).
CATALOG_URL = "http://lakekeeper:8181/catalog"
TRINO_URI = "http://trino:8080"
WAREHOUSE = "sepahram"

# Create Trino Catalog

In [2]:
from trino.dbapi import connect

conn = connect(host=TRINO_URI, user="trino")

In [3]:
cur = conn.cursor()
cur.execute(
    f"""
    CREATE CATALOG lakekeeper USING iceberg
    WITH (
        "iceberg.catalog.type" = 'rest',
        "iceberg.rest-catalog.uri" = '{CATALOG_URL}',
        "iceberg.rest-catalog.warehouse" = '{WAREHOUSE}',
        "iceberg.rest-catalog.security" = 'NONE',
        "iceberg.rest-catalog.vended-credentials-enabled" = 'true',
        "s3.region"= 'dummy',
        "s3.path-style-access" = 'true',
        "s3.endpoint" = 'http://minio:9000',
        "fs.native-s3.enabled" = 'true'
    )
"""
)

<trino.dbapi.Cursor at 0x7d44cab73110>

## Read and Write Tables

In [5]:
# Connect directly to demo catalog, so that we don't have to use it as a prefix
conn = connect(host=TRINO_URI, user="trino", catalog="lakekeeper")
cur = conn.cursor()

In [9]:
cur.execute("CREATE SCHEMA IF NOT EXISTS trino_namespace")

<trino.dbapi.Cursor at 0x7ad1709476e0>

In [11]:
cur.execute(
    "CREATE TABLE IF NOT EXISTS trino_namespace.trino_table (my_ints INT, my_floats DOUBLE, strings VARCHAR) WITH (format='PARQUET')"
)
cur.execute(
    "INSERT INTO trino_namespace.trino_table VALUES (1, 1.0, 'a'), (2, 2.0, 'b')"
)

<trino.dbapi.Cursor at 0x7ad1709476e0>

In [12]:
cur.execute("SELECT * FROM trino_namespace.trino_table").fetchall()

[[1, 1.0, 'a'], [1, 1.0, 'a'], [2, 2.0, 'b'], [2, 2.0, 'b']]

In [9]:
import pandas as pd

# Execute query
rows = cur.execute("SELECT * FROM banking.source_transactions").fetchall()

# Get column names
columns = [desc[0] for desc in cur.description]

# Convert to DataFrame
df = pd.DataFrame(rows, columns=columns)

# Display nicely in Jupyter or console
df

Unnamed: 0,transactionid,userid,timestamp,amount,currency,city,country,merchantname,paymentmethod,ipaddress,vouchercode,affiliateid
0,dceb47d8-9590-47ba-a817-f21e348110fe,olivia46,2025-09-10 14:16:26.178888+00:00,283.86,USD,Toniland,Gambia,Tran-Rivera,credit_card,139.255.117.110,DISCOUNT10,1a361990-3cc0-476f-806a-65821cbcf221
1,db4eda7e-7ca0-452c-97e2-82b671f5e473,tylerparker,2025-09-10 14:16:26.976892+00:00,652.86,GBP,Michaelport,Saint Lucia,Smith Ltd,credit_card,7.237.210.31,,274223dc-bddd-466c-b82e-de586baca563
2,66a9b1d7-6648-4da3-a859-6d2f5fd70171,ibuchanan,2025-09-10 14:16:27.756377+00:00,758.69,GBP,South Michaelfort,Guinea-Bissau,Steele-Goodwin,credit_card,91.114.158.20,DISCOUNT10,4d30b127-1452-4508-9cbb-3db96e70373c
3,5fb56973-4e80-43a7-95d2-7aea66e77a1a,keithkenneth,2025-09-10 14:07:28.964490+00:00,135.23,USD,Port Michaelland,Dominican Republic,"Olsen, Wood and Griffith",debit_card,3.163.223.218,DISCOUNT10,d83f62da-fd8f-43cf-b2ac-19871c10dead
4,57cb4f81-e42e-4960-ac9a-601031df86b7,johnsonkyle,2025-09-10 14:07:29.219762+00:00,923.80,USD,Nancyshire,Puerto Rico,Andrews Ltd,online_transfer,66.213.136.250,DISCOUNT10,1917a823-68c0-4426-ac9b-0b5222d6cd05
...,...,...,...,...,...,...,...,...,...,...,...,...
995,b1342e46-fcab-410f-a54c-241656537b42,kyle33,2025-09-10 14:10:20.449635+00:00,496.67,USD,West Maria,Iran,Buchanan and Sons,debit_card,86.4.11.83,DISCOUNT10,aee2b279-e185-45cc-a619-51d5832955c3
996,6a0300a8-5367-4902-be42-058171df6656,bowerswanda,2025-09-10 14:10:21.397155+00:00,573.44,GBP,Port Matthew,American Samoa,"Edwards, Juarez and Moore",credit_card,198.74.151.35,,92f1dbd7-2424-49af-99b8-cc661a9adf0e
997,08f9cd49-dabc-4951-91b2-c4e2ddd60e8d,carterlawrence,2025-09-10 14:15:05.697473+00:00,530.75,GBP,Loriton,Australia,"Valdez, Ramirez and Murphy",online_transfer,11.72.65.78,,d512e91b-7948-4893-9ec4-2a475f0c43d3
998,8be02d95-3c27-4627-90b1-b07af6297049,huffteresa,2025-09-10 14:15:05.865005+00:00,365.47,GBP,Smithtown,Bouvet Island (Bouvetoya),Allen-Hensley,debit_card,129.136.35.106,,c5b7ed78-b75a-463b-a4b8-fe0043da8741


### 1. Total transaction amount and count per city

This query aggregates the total transaction volume and number of transactions for each city.

In [11]:
# Execute query
rows = cur.execute("""
SELECT
    city,
    COUNT(*) AS total_transactions,
    SUM(amount) AS total_amount,
    AVG(amount) AS avg_amount
FROM banking.source_transactions
GROUP BY city
ORDER BY total_amount DESC
LIMIT 10
""").fetchall()

# Get column names
columns = [desc[0] for desc in cur.description]

# Convert to DataFrame
df = pd.DataFrame(rows, columns=columns)

# Display nicely in Jupyter or console
df


Unnamed: 0,city,total_transactions,total_amount,avg_amount
0,New James,3,1696.95,565.65
1,Lake Michael,2,1654.95,827.475
2,Port Wendy,2,1642.96,821.48
3,Kevinmouth,2,1603.99,801.995
4,Ericmouth,2,1597.68,798.84
5,Port Matthew,2,1530.08,765.04
6,Davidmouth,2,1500.63,750.315
7,New Jessica,2,1409.54,704.77
8,Port Scott,2,1388.64,694.32
9,Lake Sarah,2,1367.18,683.59


### 2. Number of transactions per payment method per currency

This query shows how different payment methods are used for each currency.

In [13]:
# Execute query
rows = cur.execute("""
SELECT
    currency,
    paymentMethod,
    COUNT(*) AS transactions_count
FROM banking.source_transactions
GROUP BY currency, paymentMethod
ORDER BY currency, transactions_count DESC
""").fetchall()

# Get column names
columns = [desc[0] for desc in cur.description]

# Convert to DataFrame
df = pd.DataFrame(rows, columns=columns)

# Display nicely in Jupyter or console
df


Unnamed: 0,currency,paymentMethod,transactions_count
0,GBP,online_transfer,175
1,GBP,credit_card,169
2,GBP,debit_card,165
3,USD,online_transfer,176
4,USD,credit_card,158
5,USD,debit_card,157


### 3. High-value transactions (potential fraud) per user

This query finds users with transactions above a threshold (e.g., 900 units).

In [14]:
# Execute query
rows = cur.execute("""
SELECT
    userId,
    transactionId,
    amount,
    currency,
    merchantName,
    timestamp
FROM banking.source_transactions
WHERE amount > 900
ORDER BY amount DESC
LIMIT 20

""").fetchall()

# Get column names
columns = [desc[0] for desc in cur.description]

# Convert to DataFrame
df = pd.DataFrame(rows, columns=columns)

# Display nicely in Jupyter or console
df

Unnamed: 0,userId,transactionId,amount,currency,merchantName,timestamp
0,thomaslopez,cf2f5d05-3c8b-4a4e-99b5-aa816a460e31,999.98,USD,Anderson LLC,2025-09-10 14:15:03.426350+00:00
1,christopher94,97506059-9cb8-4901-9098-62ab5b2a7b65,999.48,USD,"Smith, Carlson and Sandoval",2025-09-10 14:15:23.167811+00:00
2,joanne50,d931f890-bba7-46f9-b16a-2dfdcfad75aa,998.81,USD,Dorsey Inc,2025-09-10 14:15:28.695568+00:00
3,ngibson,bb4c35e5-baa0-4e59-9165-93cfa8c75173,998.77,USD,Watts-Martinez,2025-09-10 14:13:50.567728+00:00
4,tonyahall,c23c7604-57ab-4a48-af72-706ce399bef4,995.22,USD,"Johnson, Wallace and Garcia",2025-09-10 14:14:20.240521+00:00
5,bradleydavid,6da2647b-3bcb-4add-8b6f-6deda25cdc87,995.11,USD,Duarte LLC,2025-09-10 14:08:41.692850+00:00
6,rivaskatherine,87fda341-c287-488d-afd3-b1001279d116,995.02,GBP,"Hensley, Johnson and Schaefer",2025-09-10 14:14:54.017038+00:00
7,aliciahoward,b407add3-2e65-4888-a127-9f726679d71d,994.75,USD,"Brady, Dean and Martinez",2025-09-10 14:12:48.756820+00:00
8,conwayjeffrey,e0c98de9-f907-4cd8-abad-474b252368ba,992.5,GBP,Grimes Group,2025-09-10 14:08:17.236881+00:00
9,meganjones,aa1c7f8d-35c6-4ef1-98c4-06ffa47474f1,992.0,GBP,"Koch, Miller and Grimes",2025-09-10 14:10:48.561394+00:00
