In [0]:
from pyspark.sql import functions as f
from pyspark.sql.functions import expr, explode, col
from sedona.spark import *

In [0]:
dataset_storage_account_name="melikadatabricksstorage"
dataset_container_name="geospatial-dataset"
dataset_dir="raster/orthophoto/Buffalo"
geotiff_name = "2022_4BandImagery_NewYork_J1187738.tif"
geopackage_name = "Buffalo.gpkg"
catalog_name = "geospatial"
file_url = f"abfss://{dataset_container_name}@{dataset_storage_account_name}.dfs.core.windows.net/{dataset_dir}/{geotiff_name}"

In [0]:
dataset_bucket_name = "revodata-databricks-geospatial"
file_urls = {"2011": f"s3://{dataset_bucket_name}/geospatial-dataset/raster/orthophoto/essex-junction/2011/2011_4BandImagery_J1191011.tif", "2021": f"s3://{dataset_bucket_name}/geospatial-dataset/raster/orthophoto/essex-junction/2021/2021_4BandImagery_Vermont_J1191013.tif"}
catalog_name = "geospatial"


In [0]:
print(file_urls["2021"])

In [0]:
config = SedonaContext.builder(). \
    config("spark.hadoop.fs.s3a.bucket.wherobots-examples.aws.credentials.provider","org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"). \
    getOrCreate()

sedona = SedonaContext.create(config)




In [0]:
df_image_new = sedona.read.format("binaryFile").load(file_urls["2021"])
df_image_new = df_image_new.withColumn("raster", f.expr("RS_FromGeoTiff(content)"))
df_image_new.createOrReplaceTempView("image_new_vw")
df_image_new.printSchema()
df_image_new.count()
display(df_image_new)

In [0]:
df_image_old = sedona.read.format("binaryFile").load(file_urls["2011"])
df_image_old = df_image_old.withColumn("raster", f.expr("RS_FromGeoTiff(content)"))
df_image_old.createOrReplaceTempView("image_old_vw")
df_image_old.printSchema()
df_image_old.count()
display(df_image_old)

In [0]:

metadata_info = spark.sql("SELECT RS_MetaData(raster) FROM image_new_vw")
display(metadata_info)

In [0]:
sedona.sql("SELECT RS_NumBands(raster) FROM image_new_vw").show()

In [0]:
htmlDf = sedona.sql("SELECT RS_AsImage(raster, 500) FROM image_new_vw")
SedonaUtils.display_image(htmlDf)

In [0]:
%python
all_stats = sedona.sql("SELECT RS_SummaryStatsAll(raster) FROM image_new_vw")
display(all_stats)

In [0]:
display(spark.sql("select RS_BandPixelType(raster) from image_new_vw"))

In [0]:
display(spark.sql("select RS_Count(raster) from image_new_vw"))

In [0]:
tiled_df_new = df_image_new.selectExpr(
  "RS_TileExplode(raster, 100, 100)"
).withColumnRenamed("x", "tile_x").withColumnRenamed("y", "tile_y")
tiled_df_new.show(3)

tiled_df_new.createOrReplaceTempView("tiled_df_new_vw")
tiled_df_new.count()

In [0]:
display(tiled_df_new.limit(1))

In [0]:
first_tile = tiled_df_new.limit(1)
first_tile.createOrReplaceTempView("first_tile_vw")

htmlDf = sedona.sql("SELECT RS_AsImage(tile) FROM first_tile_vw")
SedonaUtils.display_image(htmlDf)

In [0]:
ndvi_df_new = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 1)), RS_Add(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 1))) AS ndvi from tiled_df_new_vw")

ndvi_df_new.createOrReplaceTempView("ndvi_df_new_vw")

display(ndvi_df_new.limit(1))

In [0]:
ndwi_df_new = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 2)), RS_Add(RS_BandAsArray(tile, 4), RS_BandAsArray(tile, 2))) AS ndwi from tiled_df_new_vw")

ndwi_df_new.createOrReplaceTempView("ndwi_df_new_vw")

display(ndwi_df_new.limit(1))

In [0]:
ndgi_df_new = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(tile, 2), RS_BandAsArray(tile, 1)), RS_Add(RS_BandAsArray(tile, 2), RS_BandAsArray(tile, 1))) AS ndgi from tiled_df_new_vw")
ndgi_df_new.createOrReplaceTempView("ndgi_df_new_vw")
display(ndgi_df_new.limit(1))

In [0]:
indices_df_new = sedona.sql(
"""SELECT a.tile_x, a.tile_y, a.tile, a.ndvi, b.ndwi, c.ndgi
FROM ndvi_df_new_vw a
JOIN ndwi_df_new_vw b
ON a.tile_x=b.tile_x AND a.tile_y=b.tile_y
JOIN ndgi_df_new_vw c
ON a.tile_x=c.tile_x AND a.tile_y=c.tile_y"""
)

display(indices_df_new.limit(1))

In [0]:
indices_df_new.printSchema()

In [0]:
neon_df = sedona.read.format("binaryFile").load(file_url)
neon_df = buffalo_df.withColumn("raster", f.expr("RS_FromGeoTiff(content)"))
neon_df.createOrReplaceTempView("neon")
neon_df.printSchema()
neon_df.count()
display(neon_df)

In [0]:
tiled_df = neon_df.selectExpr(
  "RS_TileExplode(raster, 100, 100)"
).withColumnRenamed("x", "tile_x").withColumnRenamed("y", "tile_y").withColumnRenamed("tile", "raster")
tiled_df.show(3)

tiled_df.createOrReplaceTempView("neon")

In [0]:
ndgi_df = sedona.sql("""
SELECT 
  RS_MapAlgebra(raster, 'D', 'out = ((rast[1] - rast[0]) / (rast[1] + rast[0]));') 
AS raster
FROM neon
""")
ndgi_df.createOrReplaceTempView("ndgi")
display(ndgi_df)


In [0]:
ndgi_df = spark.sql("select *, RS_Divide(RS_Subtract(RS_BandAsArray(raster, 2), RS_BandAsArray(raster, 1)), RS_Add(RS_BandAsArray(raster, 2), RS_BandAsArray(raster, 1))) AS ndgi from neon")
ndgi_df.createOrReplaceTempView("ndgi")
display(ndgi_df.limit(3))

In [0]:
sedona.sql("SELECT * FROM ndgi").show(truncate=False)