In [1]:
%pip install trino sqlalchemy pandas

Collecting trino
  Downloading trino-0.334.0-py3-none-any.whl.metadata (20 kB)
Collecting tzlocal (from trino)
  Downloading tzlocal-5.3.1-py3-none-any.whl.metadata (7.6 kB)
Downloading trino-0.334.0-py3-none-any.whl (57 kB)
Downloading tzlocal-5.3.1-py3-none-any.whl (18 kB)
Installing collected packages: tzlocal, trino
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [trino]
[1A[2KSuccessfully installed trino-0.334.0 tzlocal-5.3.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
from sqlalchemy import create_engine, text
import pandas as pd

TRINO_HOST = 'trino-coordinator' # Service name in docker-compose
TRINO_PORT = 8080
TRINO_USER = 'testuser' # Can be any string, Trino by default doesn't enforce auth in this setup
CATALOG = 'iceberg' # Catalog name as defined in iceberg.properties

# Connection string for Trino
trino_conn_str = f'trino://{TRINO_USER}@{TRINO_HOST}:{TRINO_PORT}/{CATALOG}'
engine = create_engine(trino_conn_str)

In [3]:
def run_trino_query(query, fetch_results=True):
    """Executes a Trino query and optionally fetches results into a Pandas DataFrame."""
    with engine.connect() as connection:
        # For queries that modify data or schema, autocommit is usually the default or not needed to be set explicitly for Trino
        # For DML/DDL, we might not always fetch results
        result_proxy = connection.execute(text(query))
        if fetch_results and result_proxy.returns_rows:
            df = pd.DataFrame(result_proxy.fetchall(), columns=result_proxy.keys())
            return df
        elif fetch_results: # No rows returned but fetch_results was true
            return pd.DataFrame(columns=result_proxy.keys() if result_proxy.returns_rows else [])
        else:
            print(f"Query executed successfully (returns_rows={result_proxy.returns_rows}).")
            # For DDL/DML, we might want to check row count if available
            # print(f"Rows affected (approx): {result_proxy.rowcount}") # rowcount might not be reliable for all statements/drivers
            return None

print(f"Connected to Trino: {trino_conn_str}")

Connected to Trino: trino://testuser@trino-coordinator:8080/iceberg


In [4]:
SCHEMA_NAME = 'lor'

run_trino_query(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA_NAME} WITH (location = 's3a://iceberg-warehouse/{SCHEMA_NAME}/')", fetch_results=False)
print(f"Schema '{SCHEMA_NAME}' created or already exists.")

print("\nAvailable schemas in Iceberg catalog:")
schemas_df = run_trino_query(f"SHOW SCHEMAS FROM {CATALOG}")
print(schemas_df)

Query executed successfully (returns_rows=False).
Schema 'lor' created or already exists.

Available schemas in Iceberg catalog:
               Schema
0  information_schema
1                 lor
2              system


In [8]:
run_trino_query(f"SHOW TABLES FROM {CATALOG}.{SCHEMA_NAME}")

Unnamed: 0,Table


In [9]:
TABLE_NAME = 'character_sightings'
FQN_TABLE_NAME = f"{CATALOG}.{SCHEMA_NAME}.{TABLE_NAME}"
query = f"SELECT * FROM {FQN_TABLE_NAME} LIMIT 10"

In [10]:
run_trino_query(query, fetch_results=True)


ProgrammingError: (trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=TABLE_NOT_FOUND, message="line 1:15: Table 'iceberg.lor.character_sightings' does not exist", query_id=20250512_175752_00006_3ukbd)
[SQL: SELECT * FROM iceberg.lor.character_sightings LIMIT 10]
(Background on this error at: https://sqlalche.me/e/20/f405)