In [None]:
!pip install apache-sedona

In [None]:
import os

import geopandas as gpd
from pyspark.sql import SparkSession

from sedona.spark import *

In [None]:
config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'
           'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \
    getOrCreate()

sedona = SedonaContext.create(config)

# Carregando os dados de escolas de Goiás

In [None]:
schema = "geom string,cod_munici string,municipio string,rede string,zona string,cod_esc string,unid_escol string,cep string,endereco string,nro string,compl string,bairro string,lat float,long float,localizaca string,metadad string"

escolas_df = sedona.read.format("csv")\
    .option("delimiter", ",")\
    .option("header", "false")\
    .schema(schema)\
    .load("/data/escolas.csv")

In [None]:
escolas_df.printSchema()

In [None]:
escolas_df.show(1, truncate=False)

In [None]:
from pyspark.sql.functions import *
escolas_df = escolas_df.withColumn("geom", expr("ST_GeomFromText(geom)"))

In [None]:
escolas_df.printSchema()

# Carregando os dados de municípios de Goiás

In [None]:
schema = "geom string,cod_municipio string,nome_municipio string,sigla_uf string,area_km2 float"

municipios_df = sedona.read.format("csv")\
    .option("delimiter", ",")\
    .option("header", "false")\
    .schema(schema)\
    .load("/data/munic_go.csv")

In [None]:
municipios_df.printSchema()

In [None]:
municipios_df = municipios_df.withColumn("geom", expr("ST_GeomFromText(geom)"))

In [None]:
municipios_df.printSchema()

In [None]:
municipios_df.select("cod_municipio").show(1,truncate=False)

# Junção entre os dados de municípios e escolas de Goiás

In [None]:
municipios_df.createOrReplaceTempView("municipios_go")

In [None]:
escolas_df.createOrReplaceTempView("escolas_go")

In [None]:
sedona.sql("select m.nome_municipio,e.municipio from municipios_go as m, escolas_go as e where ST_CONTAINS(m.geom, e.geom) limit 10").show(10, truncate=False)

### ST_Point

In [None]:
point_csv_df = sedona.read.format("csv").\
    option("delimiter", ",").\
    option("header", "false").\
    load("file:///data/testpoint.csv")

In [None]:
point_csv_df.createOrReplaceTempView("pointtable")

point_df = sedona.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
point_df.show(5)

### ST_GeomFromText

In [None]:
polygon_wkt_df = sedona.read.format("csv").\
    option("delimiter", "\t").\
    option("header", "false").\
    load("file:///data/county_small.tsv")

polygon_wkt_df.createOrReplaceTempView("polygontable")
polygon_df = sedona.sql("select polygontable._c6 as name, ST_GeomFromText(polygontable._c0) as countyshape from polygontable")
polygon_df.show(5)

### ST_GeomFromWKB

In [None]:
polygon_wkb_df = sedona.read.format("csv").\
    option("delimiter", "\t").\
    option("header", "false").\
    load("file:///data/county_small_wkb.tsv")

polygon_wkb_df.createOrReplaceTempView("polygontable")
polygon_df = sedona.sql("select polygontable._c6 as name, ST_GeomFromWKB(polygontable._c0) as countyshape from polygontable")
polygon_df.show(5)

### ST_GeomFromGeoJSON

In [None]:
polygon_json_df = sedona.read.format("csv").\
    option("delimiter", "\t").\
    option("header", "false").\
    load("file:///data/testPolygon.json")

polygon_json_df.createOrReplaceTempView("polygontable")
polygon_df = sedona.sql("select ST_GeomFromGeoJSON(polygontable._c0) as countyshape from polygontable")
polygon_df.show(5)

### Spatial Join - Distance Join

In [None]:
point_csv_df_1 = sedona.read.format("csv").\
    option("delimiter", ",").\
    option("header", "false").load("file:///data/testpoint.csv")

point_csv_df_1.createOrReplaceTempView("pointtable")

point_df1 = sedona.sql("SELECT ST_Point(cast(pointtable._c0 as Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape1, \'abc\' as name1 from pointtable")
point_df1.createOrReplaceTempView("pointdf1")

point_csv_df2 = sedona.read.format("csv").\
    option("delimiter", ",").\
    option("header", "false").load("file:///data/testpoint.csv")

point_csv_df2.createOrReplaceTempView("pointtable")
point_df2 = sedona.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape2, \'def\' as name2 from pointtable")
point_df2.createOrReplaceTempView("pointdf2")

distance_join_df = sedona.sql("select * from pointdf1, pointdf2 where ST_Distance(pointdf1.pointshape1,pointdf2.pointshape2) < 2")
distance_join_df.explain()
distance_join_df.show(5)

In [None]:
import pandas as pd
gdf = gpd.read_file("file:///data/gis_osm_pois_free_1.shp")
gdf = gdf.replace(pd.NA, '')
osm_points = sedona.createDataFrame(
    gdf
)

In [None]:
osm_points.printSchema()

In [None]:
osm_points.show(5)

In [None]:
osm_points.createOrReplaceTempView("points")

In [None]:
transformed_df = sedona.sql(
    """
        SELECT osm_id,
               code,
               fclass,
               name,
               ST_Transform(geometry, 'epsg:4326', 'epsg:2180') as geom 
        FROM points
    """)

In [None]:
transformed_df.show()