In [0]:
from pyspark.sql import functions as f
from pyspark.sql.functions import expr, explode, col
from sedona.spark import *

In [0]:
dataset_storage_account_name="melikadatabricksstorage"
dataset_container_name="geospatial-dataset"
dataset_dir="raster/orthophoto/Buffalo"
geotiff_name = "2022_4BandImagery_NewYork_J1187738.tif"
geopackage_name = "Buffalo.gpkg"
catalog_name = "geospatial"
file_url = f"abfss://{dataset_container_name}@{dataset_storage_account_name}.dfs.core.windows.net/{dataset_dir}/{geotiff_name}"

In [0]:
config = SedonaContext.builder(). \
    config("spark.hadoop.fs.s3a.bucket.wherobots-examples.aws.credentials.provider","org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"). \
    getOrCreate()

sedona = SedonaContext.create(config)

In [0]:
buffalo_df = sedona.read.format("binaryFile").load(file_url)
buffalo_df = buffalo_df.withColumn("raster", f.expr("RS_FromGeoTiff(content)"))
buffalo_df.createOrReplaceTempView("buffalo")
buffalo_df.printSchema()
buffalo_df.count()
display(buffalo_df)

In [0]:

metadata_info = spark.sql("SELECT RS_MetaData(raster) FROM buffalo")
display(metadata_info)

In [0]:
sedona.sql("SELECT RS_NumBands(raster) FROM buffalo").show()

In [0]:
htmlDf = sedona.sql("SELECT RS_AsImage(raster, 500) FROM buffalo")
SedonaUtils.display_image(htmlDf)

In [0]:
%python
all_stats = sedona.sql("SELECT RS_SummaryStatsAll(raster) FROM buffalo")
display(all_stats)

In [0]:
display(spark.sql("select RS_BandPixelType(raster) from buffalo"))

In [0]:
display(spark.sql("select RS_Count(raster) from buffalo"))

In [0]:
tiled_df = buffalo_df.selectExpr(
  "RS_TileExplode(raster, 100, 100)"
).withColumnRenamed("x", "tile_x").withColumnRenamed("y", "tile_y")
tiled_df.show(3)

tiled_df.createOrReplaceTempView("tiled_df_vw")

In [0]:
display(tiled_df.limit(1))

In [0]:
first_tile = tiled_df.limit(1)
first_tile.createOrReplaceTempView("first_tile_vw")

htmlDf = sedona.sql("SELECT RS_AsImage(tile) FROM first_tile_vw")
SedonaUtils.display_image(htmlDf)

In [0]:
buffalo_ndvi_df = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 1)), RS_Add(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 1))) AS ndvi from tiled_df_vw")

buffalo_ndvi_df.createOrReplaceTempView("buffalo_ndvi_vw")

display(buffalo_ndvi_df.limit(1))

In [0]:
buffalo_ndwi_df = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 2)), RS_Add(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 2))) AS ndwi from tiled_df_vw")

buffalo_ndwi_df.createOrReplaceTempView("buffalo_ndwi_vw")

display(buffalo_ndwi_df.limit(1))

In [0]:
buffalo_ndgi_df = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 2), RS_BandAsArray(tile, 1)), RS_Add(RS_BandAsArray(tile, 2), RS_BandAsArray(tile, 1))) AS ndgi from tiled_df_vw")
buffalo_ndgi_df.createOrReplaceTempView("buffalo_ndgi_vw")
display(buffalo_ndgi_df.limit(1))

In [0]:
buffalo_indices_df = sedona.sql(
"""SELECT a.tile_x, a.tile_y, a.tile, a.ndvi, b.ndwi, c.ndgi
FROM buffalo_ndvi_vw a
JOIN buffalo_ndwi_vw b
ON a.tile_x=b.tile_x AND a.tile_y=b.tile_y
JOIN buffalo_ndgi_vw c
ON a.tile_x=c.tile_x AND a.tile_y=c.tile_y"""
)

display(buffalo_indices_df.limit(1))

In [0]:
buffalo_indices_df.printSchema()