In [1]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from sparknlp.annotator import *
from sparknlp.base import *
import sparknlp
from sparknlp.pretrained import PretrainedPipeline

spark = sparknlp.start()

In [2]:
path = "data/data_set_final.csv"

# Run pipeline

In [3]:
pipeline = PretrainedPipeline.from_disk('entity_recognizer_md_fr')

In [4]:
df = spark.read.option("header","true").csv(path)
data = df.select("adresse").toDF("text")
annotations = pipeline.transform(data)

In [5]:
result = annotations.select("text", "ner.result")
result.show(40, False)

+-------------------------------+----------------------------------+
|text                           |result                            |
+-------------------------------+----------------------------------+
|Route de Trevoux               |[I-MISC, I-MISC, I-MISC]          |
|Rue Sainte Marie               |[I-PER, I-PER, I-PER]             |
|1711 Route d’Hauteville        |[O, I-MISC, I-MISC]               |
|Route du Morbier               |[I-LOC, I-LOC, I-LOC]             |
|Rue des Bleuets                |[I-LOC, I-LOC, I-LOC]             |
|7 Rue Jean Monnet              |[O, I-LOC, I-PER, I-PER]          |
|193 Chemin de Chavagneux       |[O, I-LOC, I-LOC, I-LOC]          |
|55 Rue du Coteau               |[O, I-LOC, I-LOC, I-LOC]          |
|Rue de l’Eglise                |[I-LOC, I-LOC, I-LOC]             |
|89 Impasse des Chênes          |[O, I-LOC, I-LOC, I-LOC]          |
|Rue du Journans                |[I-LOC, I-LOC, I-LOC]             |
|Rue de la Rogeraie             |[

In [8]:
result = annotations.select("text", "entities.result", "entities.metadata")
result.show(40, False)

+-------------------------------+---------------------------------+-----------------------------------------------------------------------------------------+
|text                           |result                           |metadata                                                                                 |
+-------------------------------+---------------------------------+-----------------------------------------------------------------------------------------+
|Route de Trevoux               |[Route de Trevoux]               |[{entity -> MISC, sentence -> 0, chunk -> 0}]                                            |
|Rue Sainte Marie               |[Rue Sainte Marie]               |[{entity -> PER, sentence -> 0, chunk -> 0}]                                             |
|1711 Route d’Hauteville        |[Route d’Hauteville]             |[{entity -> MISC, sentence -> 0, chunk -> 0}]                                            |
|Route du Morbier               |[Route du Morbier] 