In [14]:
from sedona.spark import *
from pathlib import Path
from pyspark.sql import DataFrame
import json

from ipyleaflet import Map, basemaps, basemap_to_tiles, MarkerCluster, Marker, AwesomeIcon
from ipywidgets import Layout
import numpy as np

In [2]:
# get the project root dir
project_root_dir = Path.cwd().parent.parent
data_dir = f"{project_root_dir}/data"
win_root_dir = "C:/Users/PLIU/Documents/ubuntu_share/data_set"
fr_commune_file_path = f"{win_root_dir}/kaggle/geospatial/communes_fr_geoparquet"

In [3]:
# build a sedona session (sedona = 1.6.1)
jar_folder = Path(f"{project_root_dir}/jars/sedona-35-213-161")
jar_list = [str(jar) for jar in jar_folder.iterdir() if jar.is_file()]
jar_path = ",".join(jar_list)

# build a sedona session (sedona = 1.6.1) offline
config = SedonaContext.builder() \
    .master("local[*]") \
    .config('spark.jars', jar_path). \
    getOrCreate()

In [4]:
# create a sedona context
sedona = SedonaContext.create(config)
sc = sedona.sparkContext
spark = sedona.getActiveSession()


In [5]:
# this sets the encoding of shape files
sc.setSystemProperty("sedona.global.charset", "utf8")

In [6]:
fr_commune_df = sedona.read.format("geoparquet").load(fr_commune_file_path)

In [7]:
fr_commune_df.show(5)
fr_commune_df.printSchema()

+--------------------+--------------------+--------------------+------------+-----+
|            geometry|           wikipedia|             surf_ha|         nom|insee|
+--------------------+--------------------+--------------------+------------+-----+
|POLYGON ((9.32016...|fr:Pie-d'Orezza  ...|     573.00000000...|Pie-d'Orezza|2B222|
|POLYGON ((9.20010...|fr:Lano          ...|     824.00000000...|        Lano|2B137|
|POLYGON ((9.27757...|fr:Cambia        ...|     833.00000000...|      Cambia|2B051|
|POLYGON ((9.25119...|fr:Érone         ...|     393.00000000...|       Érone|2B106|
|POLYGON ((9.28339...|fr:Oletta        ...|    2674.00000000...|      Oletta|2B185|
+--------------------+--------------------+--------------------+------------+-----+
only showing top 5 rows

root
 |-- geometry: geometry (nullable = true)
 |-- wikipedia: string (nullable = true)
 |-- surf_ha: string (nullable = true)
 |-- nom: string (nullable = true)
 |-- insee: string (nullable = true)



In [8]:
fr_commune_df = fr_commune_df.select("geometry","nom","insee")

In [9]:
fr_commune_df.show(5)
fr_commune_df.printSchema()

+--------------------+------------+-----+
|            geometry|         nom|insee|
+--------------------+------------+-----+
|POLYGON ((9.32016...|Pie-d'Orezza|2B222|
|POLYGON ((9.20010...|        Lano|2B137|
|POLYGON ((9.27757...|      Cambia|2B051|
|POLYGON ((9.25119...|       Érone|2B106|
|POLYGON ((9.28339...|      Oletta|2B185|
+--------------------+------------+-----+
only showing top 5 rows

root
 |-- geometry: geometry (nullable = true)
 |-- nom: string (nullable = true)
 |-- insee: string (nullable = true)



In [10]:
fr_commune_df.show(truncate=False,vertical=True)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
temp_table_name="fr_commune"
fr_commune_df.createOrReplaceTempView(temp_table_name)

In [16]:
def get_nearest_commune(latitude:str, longitude:str, max_commune_number:int):
    nearest_commune_df = sedona.sql(f"""
     SELECT z.nom as commune_name, z.insee, ST_DistanceSphere(ST_PointFromText('{longitude},{latitude}', ','), z.geometry) AS distance FROM {temp_table_name} as z ORDER BY distance ASC LIMIT {max_commune_number}
     """)
    return nearest_commune_df

In [17]:
# the gps coordinates for kremlin-Bicetre is 48.8100° N, 2.3539° E

kb_latitude = "48.8100"
kb_longitude = "2.3539"

In [19]:
kb_nearest_shape_df = get_nearest_commune(kb_latitude,kb_longitude,10)

In [20]:
%%time

kb_nearest_shape_df.show()
kb_nearest_shape_df.count()

+------------------+-----+------------------+
|      commune_name|insee|          distance|
+------------------+-----+------------------+
|Le Kremlin-Bicêtre|94043|198.60307108585405|
|          Gentilly|94037| 798.3521490770968|
|           Arcueil|94003|1543.0937442695515|
|         Villejuif|94076| 2007.793912679607|
|    Ivry-sur-Seine|94041| 2489.634383841373|
|            Cachan|94016| 2590.828517555236|
|         Montrouge|92049| 2750.714176859015|
|           Bagneux|92007| 3462.091511432535|
|   Vitry-sur-Seine|94081|3845.1624363327196|
|   L'Haÿ-les-Roses|94038| 3942.190017739479|
+------------------+-----+------------------+

CPU times: total: 0 ns
Wall time: 3.3 s


10