In [1]:
import json

from google.colab import files

license_keys = files.upload()

with open(list(license_keys.keys())[0]) as f:
    license_keys = json.load(f)

Saving spark_nlp_for_healthcare.json to spark_nlp_for_healthcare.json


In [2]:
%%capture
for k,v in license_keys.items(): 
    %set_env $k=$v

In [3]:
!wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-workshop/master/jsl_colab_setup.sh
!bash jsl_colab_setup.sh

! pip install spark-nlp-display

--2021-08-12 08:16:11--  https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-workshop/master/jsl_colab_setup.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1872 (1.8K) [text/plain]
Saving to: ‘jsl_colab_setup.sh’


2021-08-12 08:16:11 (25.7 MB/s) - ‘jsl_colab_setup.sh’ saved [1872/1872]

setup Colab for PySpark 3.1.1 and Spark NLP 3.1.3
Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [66.2 kB]
Ign:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu

In [4]:
import json
import os
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession

from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
import sparknlp_jsl
import sparknlp

import pandas as pd

params = {"spark.driver.memory":"16G",
"spark.kryoserializer.buffer.max":"2000M",
"spark.driver.maxResultSize":"2000M"}

spark = sparknlp_jsl.start(license_keys['SECRET'],params=params)

print ("Spark NLP Version :", sparknlp.version())
print ("Spark NLP_JSL Version :", sparknlp_jsl.version())

Spark NLP Version : 3.1.1
Spark NLP_JSL Version : 3.1.3


In [5]:
with open('nlp_test1 (1).txt', 'r') as file:
    sample = file.read()
    
sample_df = spark.createDataFrame([[sample]]).toDF("text")

In [6]:
documentAssembler = DocumentAssembler()\
      .setInputCol("text")\
      .setOutputCol("document")

sentenceDetector = SentenceDetectorDLModel.pretrained()\
      .setInputCols(["document"])\
      .setOutputCol("sentences")

tokenizer = Tokenizer()\
      .setInputCols(["sentences"])\
      .setOutputCol("tokens")\

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
      .setInputCols(["sentences", "tokens"])\
      .setOutputCol("embeddings")

pos_tagger = PerceptronModel.pretrained("pos_clinical", "en", "clinical/models")\
      .setInputCols("sentences", "tokens")\
      .setOutputCol("pos_tags")

dependency_parser = DependencyParserModel.pretrained("dependency_conllu", "en")\
      .setInputCols("sentences", "pos_tags", "tokens")\
      .setOutputCol("dependencies")

clinical_ner_tagger = MedicalNerModel.pretrained("jsl_ner_wip_greedy_clinical","en","clinical/models")\
      .setInputCols("sentences", "tokens", "embeddings")\
      .setOutputCol("ner_tags")

ner_chunker = NerConverter()\
      .setInputCols("sentences", "tokens", "ner_tags")\
      .setOutputCol("ner_chunks")

relationPairs = ["direction-external_body_part_or_region","external_body_part_or_region-direction","direction-internal_organ_or_component","internal_organ_or_component-direction"]

re_ner_chunk_filter = RENerChunksFilter()\
        .setInputCols("ner_chunks", "dependencies")\
        .setOutputCol("re_ner_chunks")\
        .setMaxSyntacticDistance(4)\
        .setRelationPairs(relationPairs)

re_model = RelationExtractionDLModel.pretrained("redl_bodypart_direction_biobert", "en", "clinical/models")\
        .setInputCols("re_ner_chunks", "sentences")\
        .setOutputCol("relations")
 
trained_pipeline = Pipeline(
    stages = [
        documentAssembler,
        sentenceDetector,
        tokenizer,
        word_embeddings,
        pos_tagger,
        clinical_ner_tagger,
        ner_chunker,
        dependency_parser,
        re_ner_chunk_filter,
        re_model])

data = spark.createDataFrame([["MRI demonstrated infarction in the upper brain stem , left cerebellum and  right basil ganglia"]]).toDF("text")

result = trained_pipeline.fit(data).transform(data)

sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]
embeddings_clinical download started this may take some time.
Approximate size to download 1.6 GB
[OK!]
pos_clinical download started this may take some time.
Approximate size to download 1.5 MB
[OK!]
dependency_conllu download started this may take some time.
Approximate size to download 16.7 MB
[OK!]
jsl_ner_wip_greedy_clinical download started this may take some time.
Approximate size to download 14.5 MB
[OK!]
redl_bodypart_direction_biobert download started this may take some time.
Approximate size to download 383.4 MB
[OK!]


In [None]:
result.selectExpr("explode(relations) as relations")\
 .select(
   "relations.metadata.chunk1",
   "relations.metadata.entity1",
   "relations.metadata.chunk2",
   "relations.metadata.entity2",
   "relations.result"
 )\
 .where("result != 0")\
 .show(20,False)

+------+---------+-------------+---------------------------+------+
|chunk1|entity1  |chunk2       |entity2                    |result|
+------+---------+-------------+---------------------------+------+
|upper |Direction|brain stem   |Internal_organ_or_component|1     |
|left  |Direction|cerebellum   |Internal_organ_or_component|1     |
|right |Direction|basil ganglia|Internal_organ_or_component|1     |
+------+---------+-------------+---------------------------+------+



In [7]:
result_sample = trained_pipeline.fit(sample_df).transform(sample_df)

In [8]:
result_sample.selectExpr("explode(relations) as relations")\
 .select(
   "relations.metadata.chunk1",
   "relations.metadata.entity1",
   "relations.metadata.chunk2",
   "relations.metadata.entity2",
   "relations.result"
 )\
 .where("result != 0")\
 .show(20,False)

+----------------+---------------------------+-------------+----------------------------+------+
|chunk1          |entity1                    |chunk2       |entity2                     |result|
+----------------+---------------------------+-------------+----------------------------+------+
|lumbar spine    |Internal_organ_or_component|right        |Direction                   |1     |
|lumbar curvature|Internal_organ_or_component|right        |Direction                   |1     |
|left            |Direction                  |shoulder     |External_body_part_or_region|1     |
|bilateral       |Direction                  |legs         |External_body_part_or_region|1     |
|right           |Direction                  |leg          |External_body_part_or_region|1     |
|left            |Direction                  |leg          |External_body_part_or_region|1     |
|right           |Direction                  |knee         |External_body_part_or_region|1     |
|Bilateral deep  |Direction   