# Tests with `parquet` format
Link of documentation: https://arrow.apache.org/docs/python/parquet.html

## Imports

In [6]:
import pyarrow.parquet as pq

In [11]:
import numpy as np
import pandas as pd
import pyarrow as pa

df = pd.DataFrame({'one': [-1, np.nan, 2.5],
                   'two': ['foo', 'bar', 'baz'],
                   'three': [True, False, True]},
                   index=list('abc'))

table = pa.Table.from_pandas(df)
table.to_pandas()

Unnamed: 0,one,two,three
a,-1.0,foo,True
b,,bar,False
c,2.5,baz,True


In [8]:
# Writing to file
fname = 'example.parquet'
pq.write_table(table, fname)

In [12]:
table2 = pq.read_table(fname)
table2.to_pandas()

Unnamed: 0,one,two,three
a,-1.0,foo,True
b,,bar,False
c,2.5,baz,True


In [2]:
# Install and load DuckDB spatial extension
duckdb.install_extension("spatial")
duckdb.load_extension("spatial")

In [3]:
s2_parquet_path = "s2-stac-api.parquet"

## Query the geoparquet file

In [4]:
if not os.path.exists(s2_parquet_path):
    print("Run notebook 02-Create a geoparquet file.ipynb first")
    exit(1)

In [5]:
sql_query = f"SELECT * EXCLUDE(geometry), ST_AsWKB(geometry) as geometry FROM '{s2_parquet_path}'"

print(sql_query)

db = duckdb.query(sql_query)

SELECT * EXCLUDE(geometry), ST_AsWKB(geometry) as geometry FROM 's2-stac-api.parquet'


In [6]:
type(db)

duckdb.duckdb.DuckDBPyRelation

## Convert DuckDB result to Arrow table

In [7]:
table = db.fetch_arrow_table()

table_head = table.slice(0, 125)  # Get first 125 rows

In [12]:
table.slice(0, 1)

pyarrow.Table
assets: struct<AOT: struct<gsd: double, href: string, proj:bbox: list<l: double>, proj:shape: list<l: int64>, proj:transform: list<l: double>, roles: list<l: string>, title: string, type: string>, B01: struct<eo:bands: list<l: struct<center_wavelength: double, common_name: string, description: string, full_width_half_max: double, name: string>>, gsd: double, href: string, proj:bbox: list<l: double>, proj:shape: list<l: int64>, proj:transform: list<l: double>, roles: list<l: string>, title: string, type: string>, B02: struct<eo:bands: list<l: struct<center_wavelength: double, common_name: string, description: string, full_width_half_max: double, name: string>>, gsd: double, href: string, proj:bbox: list<l: double>, proj:shape: list<l: int64>, proj:transform: list<l: double>, roles: list<l: string>, title: string, type: string>, B03: struct<eo:bands: list<l: struct<center_wavelength: double, common_name: string, description: string, full_width_half_max: double, name: string

In [8]:
for item in stac_table_to_items(table_head):
    print(item)

## Map


In [9]:
items = [pystac.Item.from_dict(item) for item in stac_table_to_items(table_head)]
items

[]

In [10]:
items = [pystac.Item.from_dict(item) for item in stac_table_to_items(table_head)]

map = folium.Map()
layer_control = folium.LayerControl(position="topright", collapsed=True)
fullscreen = folium_plugins.Fullscreen()
style = {"fillColor": "#00000000", "color": "#0000ff", "weight": 1}

footprints = folium.GeoJson(
    gpd.GeoDataFrame.from_features(items).to_json(),
    name="Stac Item footprints",
    style_function=lambda x: style,
    control=True,
)

footprints.add_to(map)
layer_control.add_to(map)
fullscreen.add_to(map)
map.fit_bounds(map.get_bounds())
map

AttributeError: No geometry data set (expected in column 'None').

### Geofilter

In [None]:
cql2_filter = {
    "op": "and",
    "args": [
        {
            "op": "between",
            "args": [
                {"property": "datetime"},
                ["2023-12-28T00:00:00Z", "2023-12-28T23:59:59Z"],
            ],
        },
        {"op": "between", "args": [{"property": "eo:cloud_cover"}, [90, 100]]},
        {
            "op": "s_intersects",
            "args": [
                {"property": "geometry"},
                {
                    "type": "Polygon",
                    "coordinates": [
                        [
                            [-9.51346013858793, 38.95450355515311],
                            [-9.51346013858793, 38.22500810801125],
                            [-8.359265560322228, 38.22500810801125],
                            [-8.359265560322228, 38.95450355515311],
                            [-9.51346013858793, 38.95450355515311],
                        ]
                    ],
                },
            ],
        },
    ],
}

In [None]:
sql_where = to_sql_where(json_parse(cql2_filter), IdempotentDict())
print(sql_where)

In [None]:
sql_query = f"SELECT * EXCLUDE(geometry), ST_AsWKB(geometry) as geometry FROM '{s2_parquet_path}' WHERE {sql_where}"

In [None]:
db = duckdb.query(sql_query)

In [None]:
subset_table = db.fetch_arrow_table()
subset_table[0]

In [None]:
items = [pystac.Item.from_dict(item) for item in stac_table_to_items(subset_table)]

items[0]

In [None]:
len(items)

In [None]:
map = folium.Map()
layer_control = folium.LayerControl(position="topright", collapsed=True)
fullscreen = folium_plugins.Fullscreen()
style = {"fillColor": "#00000000", "color": "#0000ff", "weight": 1}

footprints = folium.GeoJson(
    gpd.GeoDataFrame.from_features(items).to_json(),
    name="Stac Item footprints",
    style_function=lambda x: style,
    control=True,
)

footprints.add_to(map)
layer_control.add_to(map)
fullscreen.add_to(map)
map.fit_bounds(map.get_bounds())
map