### Getting Started with Sedona Queries

In [1]:
from pyspark.sql import SparkSession
from sedona.register import SedonaRegistrator

In [2]:
#Initialize Spark Session
#Include Maven Coordinates
spark = SparkSession\
    .builder\
    .appName("Python")\
    .master("local[*]")\
    .config('spark.jars.packages',
           'org.apache.sedona:sedona-python-adapter-2.4_2.11:1.0.1-incubating') \
    .getOrCreate()

In [3]:
#Register SedonaSQL functions in your Spark Session
SedonaRegistrator.registerAll(spark)

True

In [4]:
#Read data into Spark DataFrame
df = spark.read.option("delimiter", "|").csv("county_small.tsv", header=False)
df.show(5)

+--------------------+
|                 _c0|
+--------------------+
|POLYGON ((-97.019...|
|POLYGON ((-123.43...|
|POLYGON ((-104.56...|
|POLYGON ((-96.910...|
|POLYGON ((-98.273...|
+--------------------+
only showing top 5 rows



In [5]:
#Create a temporary table to use with Spark SQL
df.createOrReplaceTempView("geo_table")

In [6]:
df.printSchema()

root
 |-- _c0: string (nullable = true)



In [7]:
#Convert c0 to Geometric type
geo_query = spark.sql('''SELECT ST_GeomFromWKT(_c0) AS shape
                         FROM geo_table''')

In [8]:
#Ensure co is of Geometric type
geo_query.printSchema()

root
 |-- shape: geometry (nullable = false)



In [9]:
#View the data
geo_query.show(5)

+--------------------+
|               shape|
+--------------------+
|POLYGON ((-97.019...|
|POLYGON ((-123.43...|
|POLYGON ((-104.56...|
|POLYGON ((-96.910...|
|POLYGON ((-98.273...|
+--------------------+
only showing top 5 rows

