# Notebook for Ibis experiments

Please note that this notebook require the table *prova*, which can be created inside the *pyspark_experiments.ipynb* notebook.

In [1]:
import ibis
import polars as pl

from poor_man_lakehouse.dremio_connector.builder import DremioConnection
from poor_man_lakehouse.ibis_connector.builder import IbisConnection

ibis.options.interactive = True

d = DremioConnection()

conn = IbisConnection()

[32m2026-02-07 17:38:43.557[0m | [1mINFO    [0m | [36mpoor_man_lakehouse.dremio_connector.builder[0m:[36m_initialize_dremio[0m:[36m150[0m - [1mInitializing Dremio setup...[0m
[32m2026-02-07 17:38:43.720[0m | [1mINFO    [0m | [36mpoor_man_lakehouse.dremio_connector.builder[0m:[36m_initialize_dremio[0m:[36m155[0m - [1mAdmin user authentication successful[0m
[32m2026-02-07 17:38:43.871[0m | [1mINFO    [0m | [36mpoor_man_lakehouse.dremio_connector.builder[0m:[36m_ensure_nessie_catalog[0m:[36m245[0m - [1mNessie catalog 'nessie' already exists[0m


In [2]:
print(conn.get_connection("duckdb").list_catalogs())
print(conn.get_connection("duckdb").list_databases(catalog="memory"))
print(conn.get_connection("duckdb").list_tables(database="default"))

['memory', 'system', 'temp']
['main']
[]


In [3]:
conn.set_current_database("default", "duckdb")

CatalogException: Catalog Error: SET schema: No catalog + schema named "lakekeeper.default" found.

In [None]:
# Create a sample dataframe
sample_data = pl.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'value': [100, 200, 150, 300, 250]
})

# Get the duckdb connection and create a table
duckdb_conn = conn.get_connection("duckdb")
polars_table = duckdb_conn.create_table("prova", sample_data)

# Verify the table was created
print(duckdb_conn.list_tables(database="default"))
print(polars_table.execute())

In [None]:
conn.sql("select * from default.prova", "duckdb").execute()

In [None]:
polars_table.select("id", "name").filter(
    polars_table["name"] == "Alice"
).execute()


In [None]:
pyspark_table = conn.read_table("default", "prova", "pyspark")
pyspark_table.execute()

In [None]:
pyspark_table.select("datetime", "symbol").filter(
    pyspark_table["symbol"] == "AAPL"
).execute()


In [None]:
pyspark_table.sql("""SELECT * FROM prova WHERE symbol = 'AAPL'""").execute()

In [None]:
conn.list_tables("polars")

In [None]:
conn._fix_polars_table_name(
    "select * from default.prova join default.prova2 on default.prova.id = default.prova2.id"
)

In [None]:
conn.sql("select * from default.prova", "polars").execute()

In [None]:
spark_conn = conn.get_connection("polars")
tables = conn.list_tables("polars")
tables