In [0]:
%run ../get_user

In [0]:
user_email = spark.sql("SELECT current_user()").collect()[0][0]
username = get_username_from_email(user_email)
print(username)

In [0]:
from sedona.spark import *
from pyspark.sql.functions import expr, col

config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.3_2.12:1.7.1,'
           'org.datasyslab:geotools-wrapper:1.7.1-28.5'). \
    getOrCreate()

sedona = SedonaContext.create(config)

In [0]:
dataset_bucket_name = "revodata-databricks-geospatial"
dataset_input_dir="geospatial_dataset/vector/nl/zoetermeer"
geojson_file = "monumenten.geojson"
gpkg_files = ["bag_pand.gpkg", "bestemmingsplangebied.gpkg", "woonplaats.gpkg"]
catalog_name = "geospatial"
schema_name = "zoetermeer"


In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")

In [0]:

df = (
    sedona.read.format("geojson").option("multiLine", "true").load(f"/Volumes/{catalog_name}/inputs/{dataset_input_dir}/{geojson_file}")
    .selectExpr("explode(features) as features")
    .select("features.*")
    .select(
        "geometry",
        col("properties.FID"),
        col("properties.ObjectId"),
        col("properties.description"),
        col("properties.icon_color"),
        col("properties.is_video"),
        col("properties.lat"),
        col("properties.long"),
        col("properties.name"),
        col("properties.pic_url"),
        col("properties.thumb_url")
    )
    .withColumn("wkb_geometry", expr("ST_AsEWKB(ST_SetSrid(geometry, 4326))"))
    .withColumn("year", expr("ai_extract(description, array('bouwjaar')).bouwjaar"))
    .drop("geometry")
)
display(df)

In [0]:
df.write.mode("overwrite").saveAsTable(f"{catalog_name}.{schema_name}.monumenten_{username}")