# Read a Delta Lake table with SedonaDB

In [1]:
from deltalake import write_deltalake, DeltaTable
import pandas as pd

## Create a Delta Lake table

In [2]:
data = {
    'id': [1, 2, 3, 4, 5],
    'name': ['Store A', 'Park B', 'Building C', 'Restaurant D', 'Campus E'],
    'type': ['point', 'polygon', 'polygon', 'point', 'polygon'],
    'geometry': [
        'POINT (-73.9857 40.7484)',  # Point in NYC
        'POLYGON ((-122.4194 37.7749, -122.4094 37.7749, -122.4094 37.7849, -122.4194 37.7849, -122.4194 37.7749))',  # Square polygon in SF
        'POLYGON ((-87.6298 41.8781, -87.6198 41.8781, -87.6198 41.8881, -87.6298 41.8881, -87.6298 41.8781))',  # Square polygon in Chicago
        'POINT (-97.7431 30.2672)',  # Point in Austin
        'POLYGON ((-118.2437 34.0522, -118.2337 34.0522, -118.2337 34.0622, -118.2437 34.0622, -118.2437 34.0522))'  # Square polygon in LA
    ]
}

df = pd.DataFrame(data)

In [3]:
print(df)

   id          name     type  \
0   1       Store A    point   
1   2        Park B  polygon   
2   3    Building C  polygon   
3   4  Restaurant D    point   
4   5      Campus E  polygon   

                                            geometry  
0                           POINT (-73.9857 40.7484)  
1  POLYGON ((-122.4194 37.7749, -122.4094 37.7749...  
2  POLYGON ((-87.6298 41.8781, -87.6198 41.8781, ...  
3                           POINT (-97.7431 30.2672)  
4  POLYGON ((-118.2437 34.0522, -118.2337 34.0522...  


In [4]:
table_path = "/tmp/fun_delta"

In [5]:
write_deltalake(
    table_path,
    df,
    mode="overwrite"
)

## Read the Delta Lake table to SedonaDB

In [6]:
dt = DeltaTable(table_path)

In [7]:
arrow_table = dt.to_pyarrow_table()

In [8]:
import sedona.db

In [9]:
sd = sedona.db.connect()

In [10]:
df = sd.create_data_frame(arrow_table)

In [11]:
df.show()

┌───────┬──────────────┬─────────┬─────────────────────────────────────────────────────────────────┐
│   id  ┆     name     ┆   type  ┆                             geometry                            │
│ int64 ┆     utf8     ┆   utf8  ┆                               utf8                              │
╞═══════╪══════════════╪═════════╪═════════════════════════════════════════════════════════════════╡
│     1 ┆ Store A      ┆ point   ┆ POINT (-73.9857 40.7484)                                        │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│     2 ┆ Park B       ┆ polygon ┆ POLYGON ((-122.4194 37.7749, -122.4094 37.7749, -122.4094 37.7… │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│     3 ┆ Building C   ┆ polygon ┆ POLYGON ((-87.6298 41.8781, -87.6198 41.8781, -87.6198 41.8881… │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌

In [12]:
df.to_view("my_table")

In [13]:
df = sd.sql("""
SELECT
  id,
  name,
  type,
  ST_GeomFromWKT(geometry) as geom
from my_table
""")

In [14]:
df.show()

┌───────┬──────────────┬─────────┬─────────────────────────────────────────────────────────────────┐
│   id  ┆     name     ┆   type  ┆                               geom                              │
│ int64 ┆     utf8     ┆   utf8  ┆                             geometry                            │
╞═══════╪══════════════╪═════════╪═════════════════════════════════════════════════════════════════╡
│     1 ┆ Store A      ┆ point   ┆ POINT(-73.9857 40.7484)                                         │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│     2 ┆ Park B       ┆ polygon ┆ POLYGON((-122.4194 37.7749,-122.4094 37.7749,-122.4094 37.7849… │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│     3 ┆ Building C   ┆ polygon ┆ POLYGON((-87.6298 41.8781,-87.6198 41.8781,-87.6198 41.8881,-8… │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌