## Notebook to demo reading GDB feature class.

Unzip the test.gdb.zip in the data folder.

```shell
uv pip install pyproj
```

In [1]:
from pyproj import CRS

### Show the catalog.

In [2]:
spark.read.format("gdb").options(path="../data/test.gdb").load().show(truncate=False)

+---+-------------------------+----------+
|ID |Name                     |FileFormat|
+---+-------------------------+----------+
|1  |GDB_SystemCatalog        |0         |
|2  |GDB_DBTune               |0         |
|3  |GDB_SpatialRefs          |0         |
|4  |GDB_Items                |0         |
|5  |GDB_ItemTypes            |0         |
|6  |GDB_ItemRelationships    |0         |
|7  |GDB_ItemRelationshipTypes|0         |
|8  |GDB_ReplicaLog           |2         |
|9  |Test                     |0         |
+---+-------------------------+----------+



                                                                                

### Read `Test` feature class.

In [3]:
df = spark.read.format("gdb").options(path="../data/test.gdb", name="Test").load()

In [4]:
df.printSchema()

root
 |-- OBJECTID: integer (nullable = false)
 |-- Shape: struct (nullable = true)
 |    |-- x: double (nullable = true)
 |    |-- y: double (nullable = true)
 |-- AText: string (nullable = true)
 |-- ALong: integer (nullable = true)
 |-- ADouble: double (nullable = true)
 |-- ADate: timestamp (nullable = true)
 |-- AFloat: float (nullable = true)



In [6]:
def geom_type_to_str(layer_geom_type: int) -> str:
    if layer_geom_type == 1:
        return "point"
    if layer_geom_type == 2:
        return "multipoint"
    if layer_geom_type == 3:
        return "polyline"
    if layer_geom_type == 4:
        return "polygon"
    if layer_geom_type == 9:
        return "multipatch"
    return "unknown"

### Get `Shape` field metadata.

In [11]:
for f in df.schema:
    if f.name.lower() == "shape":
        print(geom_type_to_str(f.metadata["geomType"]))
        print("hasM =", f.metadata["hasM"], "hasZ =", f.metadata["hasZ"])
        print(f.metadata["ymax"])
        print(f.metadata["ymin"])
        print(f.metadata["xmin"])
        print(f.metadata["xmax"])
        print(f.metadata["srsWKT"])
        crs = CRS.from_wkt(f.metadata["srsWKT"])
        wkid = crs.to_epsg()
        print(wkid)
        break

point
hasM = False hasZ = False
34.455369428000004
25.270785538000005
25.413723117000004
36.415447984000004
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
4326


In [None]:
(
    df.select(
        "OBJECTID",
        "AText",
        "ALong",
        "ADouble",
        "ADate",
        "Shape.x",
        "Shape.y",
    ).show(vertical=True, truncate=False)
)