## Notebook to demo reading GDB feature class.

Unzip the test.gdb.zip in the data folder.

### Show the catalog.

In [1]:
spark\
    .read\
    .format("gdb")\
    .options(path="data/test.gdb")\
    .load()\
    .show(truncate=False)

11:29:18 DEBUG GDBIndex$: a00000001::n1024Blocks=1, maxRows = 9, numBytesPerRow=5
11:29:18 DEBUG GDBTable$: Opening 'data/test.gdb/a00000001.gdbtable'
11:29:18 DEBUG GDBTable$: maxRows=9
11:29:18 DEBUG GDBTable$: largestSize=62
11:29:18 DEBUG GDBTable$: gdbVer=4
11:29:18 DEBUG GDBTable$: geometryType=0
11:29:18 DEBUG GDBTable$: hasZ=false hasM=false
11:29:18 DEBUG GDBTable$: numFields=3
11:29:18 DEBUG GDBTable$: nameLen=2 name=ID aliasLen=0 alias=ID fieldType=6
11:29:18 DEBUG GDBTable$: nameLen=4 name=Name aliasLen=0 alias=Name fieldType=4
11:29:18 DEBUG GDBTable$: nameLen=10 name=FileFormat aliasLen=0 alias=FileFormat fieldType=1
11:29:18 DEBUG GDBIndex: indicies::startRow=0 numRows=9 numBytesPerRow=5
11:29:18 DEBUG FileGDB$: listTables::id=1 name=GDB_SystemCatalog fileFormat=0
11:29:18 DEBUG FileGDB$: listTables::id=2 name=GDB_DBTune fileFormat=0
11:29:18 DEBUG FileGDB$: listTables::id=3 name=GDB_SpatialRefs fileFormat=0
11:29:18 DEBUG FileGDB$: listTables::id=4 name=GDB_Items fileFo

+---+-------------------------+----------+
|ID |Name                     |FileFormat|
+---+-------------------------+----------+
|1  |GDB_SystemCatalog        |0         |
|2  |GDB_DBTune               |0         |
|3  |GDB_SpatialRefs          |0         |
|4  |GDB_Items                |0         |
|5  |GDB_ItemTypes            |0         |
|6  |GDB_ItemRelationships    |0         |
|7  |GDB_ItemRelationshipTypes|0         |
|8  |GDB_ReplicaLog           |2         |
|9  |Test                     |0         |
+---+-------------------------+----------+



11:29:20 DEBUG GDBIndex$: a00000001::n1024Blocks=1, maxRows = 9, numBytesPerRow=5
11:29:20 DEBUG GDBTable$: Opening 'data/test.gdb/a00000001.gdbtable'
11:29:20 DEBUG GDBTable$: maxRows=9
11:29:20 DEBUG GDBTable$: largestSize=62
11:29:20 DEBUG GDBTable$: gdbVer=4
11:29:20 DEBUG GDBTable$: geometryType=0
11:29:20 DEBUG GDBTable$: hasZ=false hasM=false
11:29:20 DEBUG GDBTable$: numFields=3
11:29:20 DEBUG GDBTable$: nameLen=2 name=ID aliasLen=0 alias=ID fieldType=6
11:29:20 DEBUG GDBTable$: nameLen=4 name=Name aliasLen=0 alias=Name fieldType=4
11:29:20 DEBUG GDBTable$: nameLen=10 name=FileFormat aliasLen=0 alias=FileFormat fieldType=1
11:29:20 DEBUG GDBIndex: indicies::startRow=0 numRows=9 numBytesPerRow=5


### Read `Test` feature class.

In [2]:
df = spark\
    .read\
    .format("gdb")\
    .options(path="data/test.gdb", name="Test")\
    .load()

11:29:20 DEBUG GDBIndex$: a00000001::n1024Blocks=1, maxRows = 9, numBytesPerRow=5
11:29:20 DEBUG GDBTable$: Opening 'data/test.gdb/a00000001.gdbtable'
11:29:20 DEBUG GDBTable$: maxRows=9
11:29:20 DEBUG GDBTable$: largestSize=62
11:29:20 DEBUG GDBTable$: gdbVer=4
11:29:20 DEBUG GDBTable$: geometryType=0
11:29:20 DEBUG GDBTable$: hasZ=false hasM=false
11:29:20 DEBUG GDBTable$: numFields=3
11:29:20 DEBUG GDBTable$: nameLen=2 name=ID aliasLen=0 alias=ID fieldType=6
11:29:20 DEBUG GDBTable$: nameLen=4 name=Name aliasLen=0 alias=Name fieldType=4
11:29:20 DEBUG GDBTable$: nameLen=10 name=FileFormat aliasLen=0 alias=FileFormat fieldType=1
11:29:20 DEBUG GDBIndex: indicies::startRow=0 numRows=9 numBytesPerRow=5
11:29:20 DEBUG FileGDB$: listTables::id=1 name=GDB_SystemCatalog fileFormat=0
11:29:20 DEBUG FileGDB$: listTables::id=2 name=GDB_DBTune fileFormat=0
11:29:20 DEBUG FileGDB$: listTables::id=3 name=GDB_SpatialRefs fileFormat=0
11:29:20 DEBUG FileGDB$: listTables::id=4 name=GDB_Items fileFo

In [3]:
df.printSchema()

root
 |-- OBJECTID: integer (nullable = false)
 |-- Shape: struct (nullable = true)
 |    |-- x: double (nullable = true)
 |    |-- y: double (nullable = true)
 |-- AText: string (nullable = true)
 |-- ALong: integer (nullable = true)
 |-- ADouble: double (nullable = true)
 |-- ADate: timestamp (nullable = true)
 |-- AFloat: float (nullable = true)



### Get `Shape` field metadata.

In [4]:
for f in df.schema:
    if f.name == 'Shape':
        print(f.metadata['geomType'])
        print(f.metadata['srsWKT'])
        print(f.metadata['hasM'], f.metadata['hasZ'])
        print(f.metadata['ymax'])
        print(f.metadata['ymin'])
        print(f.metadata['xmin'])
        print(f.metadata['xmax'])
        break

1
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
False False
34.455369428000004
25.270785538000005
25.413723117000004
36.415447984000004


In [5]:
df\
    .select("OBJECTID","AText","ALong","ADouble","ADate","Shape.x","Shape.y")\
    .show(truncate=False)

+--------+-----+-----+------------------+-------------------+------------------+------------------+
|OBJECTID|AText|ALong|ADouble           |ADate              |x                 |y                 |
+--------+-----+-----+------------------+-------------------+------------------+------------------+
|1       |1111 |1    |1.1               |2022-04-16 20:00:00|25.413723117000075|31.306856180000068|
|2       |2222 |2    |2.2               |2022-04-17 20:00:00|33.82834403600003 |34.45536942800004 |
|3       |3333 |3    |3.3000000000000003|2022-04-19 20:00:00|36.415447984000025|28.807876261000047|
|4       |4444 |4    |4.4               |2022-04-17 20:00:00|28.405878693000034|25.27078553800004 |
+--------+-----+-----+------------------+-------------------+------------------+------------------+



11:29:26 DEBUG GDBIndex$: a00000001::n1024Blocks=1, maxRows = 9, numBytesPerRow=5
11:29:26 DEBUG GDBTable$: Opening 'data/test.gdb/a00000001.gdbtable'
11:29:26 DEBUG GDBTable$: maxRows=9
11:29:26 DEBUG GDBTable$: largestSize=62
11:29:26 DEBUG GDBTable$: gdbVer=4
11:29:26 DEBUG GDBTable$: geometryType=0
11:29:26 DEBUG GDBTable$: hasZ=false hasM=false
11:29:26 DEBUG GDBTable$: numFields=3
11:29:26 DEBUG GDBTable$: nameLen=2 name=ID aliasLen=0 alias=ID fieldType=6
11:29:26 DEBUG GDBTable$: nameLen=4 name=Name aliasLen=0 alias=Name fieldType=4
11:29:26 DEBUG GDBTable$: nameLen=10 name=FileFormat aliasLen=0 alias=FileFormat fieldType=1
11:29:26 DEBUG GDBIndex: indicies::startRow=0 numRows=9 numBytesPerRow=5
11:29:26 DEBUG FileGDB$: listTables::id=1 name=GDB_SystemCatalog fileFormat=0
11:29:26 DEBUG FileGDB$: listTables::id=2 name=GDB_DBTune fileFormat=0
11:29:26 DEBUG FileGDB$: listTables::id=3 name=GDB_SpatialRefs fileFormat=0
11:29:26 DEBUG FileGDB$: listTables::id=4 name=GDB_Items fileFo