### IMPORTING

In [None]:
# Importing packages
from pyspark.ml import Pipeline,PipelineModel

from sparknlp.training import CoNLL

from sparknlp.annotator import *
from sparknlp.base import *

import sparknlp

In [None]:
# Initialising a spark Session
ss=sparknlp.start()

In [None]:
# Creating an instance of the CoNLL class
conll = CoNLL(explodeSentences=False)

In [None]:
# upload dataset in CoNLL format
data = conll.readDataset(ss,'''INSERT PATH TO CONLL FILE''')

In [None]:
data.show()

In [None]:
# Creating instance of WordEmbeddings class; Input: document, token; Output: embeddings
embeddings = WordEmbeddings().setInputCols("document","token").setOutputCol("embeddings")

In [None]:
# Configuring with the right Embeddings file(Text format)
embeddings.setStoragePath('''INSERT PATH TO EMBEDDINGS FILE''',ReadAs.TEXT) \
    .setDimension(100) \
    .setStorageRef("sample-conll")

In [None]:
# Creating instance of NerDLApproach class; Input: sentence, token, embeddings; Output: ner
ner_dl = NerDLApproach().setInputCols(['sentence','token','embeddings']) \
    .setOutputCol('ner').setGraphFolder('''INSERT PATH TO FOLDER CONTAINING THE GRAPH''')

In [None]:
# Configuring with the NerDL
ner_dl.setMaxEpochs(1).setValidationSplit(0.25).setEnableOutputLogs(True).setIncludeConfidence(True) \
    .setEvaluationLogExtended(True).setLabelColumn('label')

### TRAINING

In [None]:
# Creating a training pipeline
training_pipeline = Pipeline().setStages([embeddings, ner_dl])

In [None]:
# TRAINING ~_~
trained_pipeline = training_pipeline.fit(data)

In [None]:
#trained_pipeline.stages

In [None]:
# Creating a document Assemler: annotating the data, i.e, labeling it
document = DocumentAssembler().setInputCol("text").setOutputCol("document")

In [None]:
# Creating a sentence Detector: detects sentences.
sentence = SentenceDetector().setInputCols(['document']).setOutputCol('sentence')

In [None]:
# Creating a Tokenizer: breaks the words into tokens
token = Tokenizer().setInputCols(['sentence']).setOutputCol('token')

### PREDICTION

In [None]:
ner_converter = NerConverter().setInputCols('sentence','token','ner').setOutputCol('ner_chunk')

In [None]:
# Setting up a prediction pipeline
prediction_pipeline = Pipeline(stages = [document, sentence, token, trained_pipeline, ner_converter])

In [None]:
# Prediction :)
prediction_data= ss.createDataFrame([["John was the Commissioner of Police, India."]]).toDF("text")
prediction_data.show(truncate = False)

In [None]:
prediction_model = prediction_pipeline.fit(prediction_data)
pred = prediction_model.transform(prediction_data)

#.select("token.result","entiry.result")
#.show(truncate = False)

In [None]:
pred.select("ner_chunk","ner.result").show(truncate = False)

### SAVING THE MODEL

In [None]:
prediction_model.write().overwrite().save("./prediction_dl_model")

In [None]:
# for using the predicted model
'''
from pyspark.ml import PipelineModel, Pipeline

loaded_prediction_model = PipelineModel.read().load("./prediction_dl_model")


loaded_prediction_model.transform(prediction_data).show(5)
'''