In [1]:
!pip install apache-sedona

[0m

In [2]:
import os

import geopandas as gpd
from pyspark.sql import SparkSession

from sedona.spark import *
from pyspark.sql.functions import *

In [3]:
config = SedonaContext.builder() .\
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'
           'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \
    getOrCreate()

sedona = SedonaContext.create(config)

:: loading settings :: url = jar:file:/usr/lib/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency
org.datasyslab#geotools-wrapper added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-68a87485-68d2-4a86-8168-039a1a7e9fbe;1.0
	confs: [default]
	found org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in central
	found org.datasyslab#geotools-wrapper;1.4.0-28.2 in central
:: resolution report :: resolve 186ms :: artifacts dl 6ms
	:: modules in use:
	org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in [default]
	org.datasyslab#geotools-wrapper;1.4.0-28.2 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      de

# Carregando os dados de escolas de Goiás

In [4]:
schema = "geom string,cod_munici string,municipio string,rede string,zona string,cod_esc string,unid_escol string,cep string,endereco string,nro string,compl string,bairro string,lat float,long float,localizaca string,metadad string"

escolas_df = sedona.read.format("csv")\
    .option("delimiter", ",")\
    .option("header", "false")\
    .schema(schema)\
    .load("/data/escolas.csv")

In [5]:
escolas_df.printSchema()

root
 |-- geom: string (nullable = true)
 |-- cod_munici: string (nullable = true)
 |-- municipio: string (nullable = true)
 |-- rede: string (nullable = true)
 |-- zona: string (nullable = true)
 |-- cod_esc: string (nullable = true)
 |-- unid_escol: string (nullable = true)
 |-- cep: string (nullable = true)
 |-- endereco: string (nullable = true)
 |-- nro: string (nullable = true)
 |-- compl: string (nullable = true)
 |-- bairro: string (nullable = true)
 |-- lat: float (nullable = true)
 |-- long: float (nullable = true)
 |-- localizaca: string (nullable = true)
 |-- metadad: string (nullable = true)



In [6]:
escolas_df.show(1, truncate=False)

[Stage 3:>                                                          (0 + 1) / 1]

+-----------------------------+--------------+----------+---------+------+---------------+-------------------------------------+---------------+------------------------------------+----+-----+------------+----------+----------+----------+-----------------------------------------------------+
|geom                         |cod_munici    |municipio |rede     |zona  |cod_esc        |unid_escol                           |cep            |endereco                            |nro |compl|bairro      |lat       |long      |localizaca|metadad                                              |
+-----------------------------+--------------+----------+---------+------+---------------+-------------------------------------+---------------+------------------------------------+----+-----+------------+----------+----------+----------+-----------------------------------------------------+
|POINT (-50.285817 -13.769293)|5214051.000000|MUNDO NOVO|Municipal|Urbana|52000877.000000|ESCOLA MUNICIPAL MILCA MARTINS 

                                                                                

In [7]:
escolas_df = escolas_df.withColumn("geom", expr("ST_GeomFromText(geom)"))

In [8]:
escolas_df.printSchema()

root
 |-- geom: geometry (nullable = true)
 |-- cod_munici: string (nullable = true)
 |-- municipio: string (nullable = true)
 |-- rede: string (nullable = true)
 |-- zona: string (nullable = true)
 |-- cod_esc: string (nullable = true)
 |-- unid_escol: string (nullable = true)
 |-- cep: string (nullable = true)
 |-- endereco: string (nullable = true)
 |-- nro: string (nullable = true)
 |-- compl: string (nullable = true)
 |-- bairro: string (nullable = true)
 |-- lat: float (nullable = true)
 |-- long: float (nullable = true)
 |-- localizaca: string (nullable = true)
 |-- metadad: string (nullable = true)



# Carregando os dados de municípios de Goiás

In [9]:
schema = "geom string,cod_municipio string,nome_municipio string,sigla_uf string,area_km2 float"

municipios_df = sedona.read.format("csv")\
    .option("delimiter", ",")\
    .option("header", "false")\
    .schema(schema)\
    .load("/data/munic_go.csv")

In [10]:
municipios_df.printSchema()

root
 |-- geom: string (nullable = true)
 |-- cod_municipio: string (nullable = true)
 |-- nome_municipio: string (nullable = true)
 |-- sigla_uf: string (nullable = true)
 |-- area_km2: float (nullable = true)



In [11]:
municipios_df = municipios_df.withColumn("geom", expr("ST_GeomFromText(geom)"))

In [12]:
municipios_df.printSchema()

root
 |-- geom: geometry (nullable = true)
 |-- cod_municipio: string (nullable = true)
 |-- nome_municipio: string (nullable = true)
 |-- sigla_uf: string (nullable = true)
 |-- area_km2: float (nullable = true)



In [13]:
municipios_df.select("cod_municipio").show(1,truncate=False)

+-------------+
|cod_municipio|
+-------------+
|5200050      |
+-------------+
only showing top 1 row



# Junção entre os dados de municípios e escolas de Goiás

In [14]:
municipios_df.createOrReplaceTempView("municipios_go")

In [15]:
escolas_df.createOrReplaceTempView("escolas_go")

In [16]:
sedona.sql("select m.nome_municipio,e.municipio from municipios_go as m, escolas_go as e where ST_CONTAINS(m.geom, e.geom) limit 10").show(10, truncate=False)

[Stage 6:>                                                          (0 + 1) / 1]

+---------------+---------------+
|nome_municipio |municipio      |
+---------------+---------------+
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadia de Goiás|ABADIA DE GOIAS|
|Abadiânia      |ABADIANIA      |
|Abadiânia      |ABADIANIA      |
|Abadiânia      |ABADIANIA      |
+---------------+---------------+



                                                                                