In [None]:
"""
Create a Forte pipeline.
"""
import forte
import os
import yaml

nlp = forte.pipeline.Pipeline()
# An example configuration for the pipeline is provided in this repo
config_file = os.path.join('examples/pipeline/inference/config.yml')
config = yaml.safe_load(open(config_file, "r"))
config = forte.common.configuration.Config(config, default_hparams=None)
print(config)

{
  "allennlp": {
    "allow_parallel_entries": true,
    "cuda_devices": [
      0,
      1
    ],
    "infer_batch_size": 50,
    "overwrite_entries": false,
    "processors": "tokenize, srl",
    "srl_url": "https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz",
    "tag_formalism": "srl"
  },
  "allennlp_query": {
    "allow_parallel_entries": true,
    "cuda_devices": [
      0,
      1
    ],
    "infer_batch_size": 1,
    "overwrite_entries": false,
    "processors": "tokenize, pos, srl",
    "srl_url": "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz",
    "tag_formalism": "srl"
  },
  "indexer": {
    "field": "content",
    "index_config": {
      "algorithm": "bm25",
      "hosts": "localhost:9200",
      "index_name": "elastic_index"
    },
    "indexed_text_only": false,
    "query_pack_name": "query",
    "response_pack_name_prefix": "passage"
  },
  "query_creator": {
    "field": "con

In [2]:
"""
Attach a reader to the pipeline.
"""
from forte.data.readers import MultiPackTerminalReader
nlp.set_reader(MultiPackTerminalReader(), config=config.reader).initialize()
# process_dataset() returns a Python iterator of data (MultiPack in this case)
# returned by `MultiPackTerminalReader`
print(next(nlp.process_dataset()).get_pack_at(0).text)

Enter your query here:  What does covid-19 cause?


What does covid-19 cause?


In [3]:
"""
Add NLP processors to extract necessary language features such as
POS (Parts of Speech), Lemma and SRL (Semantic Role Labeling).
"""
from forte_wrapper.nltk.nltk_processors import NLTKLemmatizer, \
    NLTKSentenceSegmenter, NLTKWordTokenizer, NLTKPOSTagger
from forte_wrapper.allennlp.allennlp_processors import AllenNLPProcessor
from forte.data.selector import NameMatchSelector

selector = NameMatchSelector(select_name=config.reader.pack_name)
nlp.add(NLTKSentenceSegmenter(), selector=selector)
nlp.add(NLTKWordTokenizer(), selector=selector)
nlp.add(NLTKPOSTagger(), selector=selector)
nlp.add(NLTKLemmatizer(), selector=selector)
nlp.add(AllenNLPProcessor(), config=config.allennlp_query, selector=selector)

# See what's in the result data pack
nlp.initialize()
from ft.onto.base_ontology import Token, Sentence, PredicateLink
data_pack = next(nlp.process_dataset()).get_pack_at(0)
for sent in data_pack.get(Sentence):
    print("Tokens created by NLTK:")
    for token in data_pack.get(Token, sent, components=["forte_wrapper.nltk.nltk_processors.NLTKWordTokenizer"]):
        print(f"    text: {data_pack.text[token.begin:token.end]}, pos: {token.pos}, lemma: {token.lemma}")
    print("Semantic role labels created by AllenNLP:")
    for pred in data_pack.get(PredicateLink, sent, components=["forte_wrapper.allennlp.allennlp_processors.AllenNLPProcessor"]):
        verb = pred.get_parent()
        noun = pred.get_child()
        print(f"    verb: {data_pack.text[verb.begin:verb.end]}, noun: {data_pack.text[noun.begin:noun.end]}, noun_type: {pred.arg_type}")

Enter your query here:  What does covid-19 cause?




Tokens created by NLTK:
    text: What, pos: WP, lemma: What
    text: does, pos: VBZ, lemma: do
    text: covid-19, pos: NN, lemma: covid-19
    text: cause, pos: NN, lemma: cause
    text: ?, pos: ., lemma: ?
Semantic role labels created by AllenNLP:
    verb: cause, noun: What, noun_type: ARG1
    verb: cause, noun: covid-19, noun_type: ARG0


In [4]:
"""
Use above extracted language features to create an Elastic Search (ES) query
and retrieve relevant documents from an ES database.
"""
from typing import Any, Dict, Tuple
from forte.data.data_pack import DataPack
from forte.data.multi_pack import MultiPack
from forte.processors.base import QueryProcessor
from forte_wrapper.elastic.elastic_search_processor import ElasticSearchProcessor
# from composable_source.processors.elasticsearch_query_creator import ElasticSearchQueryCreator
from composable_source.utils.utils import query_preprocess


class ElasticSearchQueryCreator(QueryProcessor):
    """
    Complete implementation is available at composable_source/processors/elasticsearch_query_creator.py
    """

    @classmethod
    def default_configs(cls) -> Dict[str, Any]:
        config = super().default_configs()
        config.update({
            "size": 1000,
            "field": "content",
            "query_pack_name": "query"
        })
        return config

    def _process_query(self, input_pack: MultiPack) -> Tuple[DataPack, Dict[str, Any]]:
        """
        process query datapack and return query
        :param input_pack:
        :return:
        """
        query_pack = input_pack.get_pack(self.configs.query_pack_name)
        query_pack.pack_name = self.configs.query_pack_name
        query = self._build_query_nlp(query_pack)
        return query_pack, query
    
    def _build_query_nlp(self, input_pack: DataPack) -> Dict[str, Any]:
        query, arg0, arg1, verb, _, is_answer_arg0 = query_preprocess(input_pack)
        if not arg0 or not arg1:
            processed_query = query
        if is_answer_arg0 is None:
            processed_query = f'{arg0} {verb} {arg1}'.lower()
        elif is_answer_arg0:
            processed_query = f'{arg1} {verb}'.lower()
        else:
            processed_query = f'{arg0} {verb}'.lower()
        return {
            "query": {
                "match_phrase": {
                    self.configs.field: {
                        "query": processed_query,
                        "slop": 10  # how far we allow the terms to be
                    }
                }
            },
            "size": self.configs.size
        }

nlp.add(ElasticSearchQueryCreator(), config=config.query_creator)
nlp.add(ElasticSearchProcessor(), config=config.indexer)

# See what's in the result data pack
nlp.initialize()
data_pack = next(nlp.process_dataset()).get_pack_at(1)
from ft.onto.base_ontology import Document
data_pack.get_single(Document)

Enter your query here:  What does covid-19 cause?


INFO:elasticsearch:GET http://localhost:9200/elastic_index/_search [status:200 request:0.065s]


Document(document_class=[], sentiment={})

In [5]:
"""
Add NLP processors to extract necessary language features such as Entity links and SRL.
"""
from composable_source.processors.scispacy_processor import SciSpacyProcessor
from forte.data.selector import RegexNameMatchSelector
pattern = rf"{config.indexer.response_pack_name_prefix}_\d"
selector = RegexNameMatchSelector(select_name=pattern)

nlp.add(component=SciSpacyProcessor(), config=config.spacy1, selector=selector)
nlp.add(component=SciSpacyProcessor(), config=config.spacy2, selector=selector)
nlp.add(AllenNLPProcessor(), config=config.allennlp, selector=selector)
nlp.add(NLTKPOSTagger(), selector=selector)
nlp.add(NLTKLemmatizer(), selector=selector)

# See what's in the result data pack
from onto.medical import MedicalEntityMention
nlp.initialize()
data_pack = next(nlp.process_dataset()).get_pack_at(1)
sent = data_pack.get_single(Sentence)
print(f"Sentence: {sent.text}")
print("Entities created by SciSpacy:")
for entity in data_pack.get(MedicalEntityMention, sent, components=["composable_source.processors.scispacy_processor.SciSpacyProcessor"]):
    for umls in entity.umls_entities:
        print(f"    entity: {umls.name}, cui: {umls.cui}")
print("Semantic role labels created by AllenNLP:")
for pred in data_pack.get(PredicateLink, sent, components=["forte_wrapper.allennlp.allennlp_processors.AllenNLPProcessor"]):
    verb = pred.get_parent()
    noun = pred.get_child()
    print(f"    verb: {data_pack.text[verb.begin:verb.end]}, noun: {data_pack.text[noun.begin:noun.end]}, noun_type: {pred.arg_type}")



Enter your query here:  What does covid-19 cause?


INFO:elasticsearch:GET http://localhost:9200/elastic_index/_search [status:200 request:0.057s]


Sentence: Journal Pre-proofs Does COVID-19 cause permanent damage to olfactory and gustatory func- tion?
Entities created by SciSpacy:
    entity: COVID-19, cui: C5203670
    entity: COVID-19, cui: C5203670
    entity: Smell Perception, cui: C0037361
    entity: Olfactory, cui: C0439826
    entity: Olfactory tract, cui: C0162435
    entity: Olfactory Cortex, cui: C0162434
    entity: Olfactory Nerve, cui: C0028938
Semantic role labels created by AllenNLP:
    verb: cause, noun: Journal Pre-proofs Does COVID-19, noun_type: ARG0
    verb: cause, noun: permanent damage to olfactory and gustatory func- tion, noun_type: ARG1


In [6]:
"""
Use a response creator to format the result data packs into human readable formats.
"""
from composable_source.processors.response_creator import ResponseCreator
nlp.add(ResponseCreator(), config=config.response)

# Execute the pipeline once on one query and see the results 
nlp.initialize()
data_pack = next(nlp.process_dataset())

Enter your query here:  What does covid-19 cause?


INFO:elasticsearch:GET http://localhost:9200/elastic_index/_search [status:200 request:0.067s]


•Relation:
COVID-19	caused	(WHO 2020
•Source Sentence:
COVID-19 should be recorded on the medical certificate of cause of death for ALL decedents where the disease caused, or is assumed to have caused, or contributed to death (WHO 2020, 3).On the one hand, when COVID-19 is not part of the causal chain that leads directly to death, it should not be indicated as the underlying cause of death.(From Paper: , COVID-19 as the underlying cause of death: disentangling facts and values)
•UMLS Concepts:
 - covid-19
	Name: COVID-19	CUI: C5203670	Learn more at: https://www.ncbi.nlm.nih.gov/search/all/?term=C5203670
•Relation:
COVID-19	cause	musculoskeletal pain
•Source Sentence:
In addition to the classic mechanisms of myalgia known in viral infections, COVID-19 can cause musculoskeletal pain with completely different mechanisms.(From Paper: , Can COVID-19 cause myalgia with a completely different mechanism? A hypothesis)
•UMLS Concepts:
 - covid-19
	Name: COVID-19	CUI: C5203670	Learn more at: htt