# Import corpus

In [1]:
import pandas as pd
import pickle

In [2]:
simplewiki_medcon = pd.read_csv('../datasources/simplewiki/extracted/swiki_sentences_ngrams_filtered.csv', sep=';')
simplewiki_medcon.columns

Index(['source_page', 'sent_id', 'sentence', 'filtered_unigrams',
       'filtered_bigrams', 'filtered_trigrams', 'filtered_quandrigrams',
       'unigrams_grouped', 'bigrams_grouped', 'trigrams_grouped',
       'quandrigrams_grouped'],
      dtype='object')

In [3]:
simplewiki_sentences = simplewiki_medcon.sentence.tolist()
print(len(simplewiki_sentences))
simplewiki_sentences[:10]

12694


['Boil might mean:   Boiling, heating a liquid to the point where it turns into gas   Boil, a type of Staphylococcal infection',
 'Bubonic plague is the best-known form of the disease plague, which is caused by the bacterium Yersinia pestis.',
 'The name bubonic plague is specific for this form of the disease, which enters through the skin, and travels through the lymphatic system.',
 'If the disease is left untreated, it kills about half its victims, in between three and seven days.',
 'The bubonic plague was the disease that caused the Black Death, which killed tens of millions of people in Europe, in the Middle Ages.',
 'Symptoms of this disease include coughing, fever, and black spots on the skin.',
 'There are different kinds of Bubonic plague.',
 'The most common form of the disease is spread by a certain kind of flea, that lives on rats.',
 'Then there is an incubation period which can last from a few hours to about seven days.',
 'Sepsis happens when the bacterium enters the bl

# Load BERT model from bert_embedding

Code from: https://gist.github.com/avidale/c6b19687d333655da483421880441950

In [4]:
import mxnet as mx
from bert_embedding import BertEmbedding
from tqdm import tqdm
import pickle

# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [7]:
ctx = mx.gpu(0)

# see https://gluon-nlp.mxnet.io/model_zoo/bert/index.html for model and dataset_name
bert = BertEmbedding(ctx=ctx, max_seq_length=256, model='bert_12_768_12', dataset_name='book_corpus_wiki_en_cased', batch_size=8)
# bert = BertEmbedding(ctx=ctx, max_seq_length=256, model='bert_12_768_12', dataset_name='book_corpus_wiki_en_uncased', batch_size=8)


## Create a search index

In [1]:
from sklearn.neighbors import KDTree
import numpy as np


class ContextNeighborStorage:
    def __init__(self, sentences, model):
        self.sentences = sentences
        self.model = model

    def process_sentences(self):
        result = self.model(self.sentences)

        self.sentence_ids = []
        self.token_ids = []
        self.all_tokens = []
        all_embeddings = []
        for i, (toks, embs) in enumerate(tqdm(result)):
            for j, (tok, emb) in enumerate(zip(toks, embs)):
                self.sentence_ids.append(i)
                self.token_ids.append(j)
                self.all_tokens.append(tok)
                all_embeddings.append(emb)
        all_embeddings = np.stack(all_embeddings)
        # we normalize embeddings, so that euclidian distance is equivalent to cosine distance
        self.normed_embeddings = (all_embeddings.T / (all_embeddings**2).sum(axis=1) ** 0.5).T

    def build_search_index(self):
        # this takes some time
        self.indexer = KDTree(self.normed_embeddings)

    def query(self, query_sent, query_word, k=10, filter_same_word=False):
        toks, embs = self.model([query_sent])[0]

        found = False
        for tok, emb in zip(toks, embs):
            if tok == query_word or tok == query_word.lower():
                found = True
                break
        if not found:
            raise ValueError('The query word {} is not a single token in sentence {}'.format(query_word, toks))
        emb = emb / sum(emb**2)**0.5

        if filter_same_word:
            initial_k = max(k, 100)
        else:
            initial_k = k
        di, idx = self.indexer.query(emb.reshape(1, -1), k=initial_k)
        distances = []
        neighbors = []
        contexts = []
        for i, index in enumerate(idx.ravel()):
            token = self.all_tokens[index]
            if filter_same_word and (query_word in token or token in query_word):
                continue
            distances.append(di.ravel()[i])
            neighbors.append(token)
            contexts.append(self.sentences[self.sentence_ids[index]])
            if len(distances) == k:
                break
        return distances, neighbors, contexts

In [9]:
storage = ContextNeighborStorage(sentences=simplewiki_sentences, model=bert)
storage.process_sentences()

100%|██████████| 12694/12694 [00:00<00:00, 160465.63it/s]


In [10]:
storage.build_search_index()

In [11]:
# Store data (serialize)
with open('./simplewiki_KDTree_bertcased256.pickle', 'wb') as handle:
    pickle.dump(storage, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
# Load data (deserialize)
with open('./simplewiki_KDTree_bertcased256.pickle', 'rb') as handle:
    tree_copy = pickle.load(handle)

# Load ClinicalBERT via SBERT

In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sentence_transformers import models
from transformers import BertForMaskedLM, BertTokenizer
import torch
from tqdm import tqdm
import pickle

# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

## Adjust bert_embedding to load clinicalbert

In [11]:
# Original code from: https://github.com/imgarylai/bert-embedding/blob/master/bert_embedding/bert.py

class BertEmbedding(object):
    def __init__(self, model, max_seq_length=256, do_lower_case=False):
        self.word_embedding_model = models.BERT(model, max_seq_length=max_seq_length, do_lower_case=do_lower_case)
#         self.tokenizer = BertTokenizer.from_pretrained(model, do_lower_case=do_lower_case)
        
    def __call__(self, sentences, oov_way='avg', filter_spec_tokens=True):
        """
        Get tokens, tokens embedding
        Parameters
        ----------
        sentences : List[str]
            sentences for encoding.
        oov_way : str, default avg.
            use **avg**, **sum** or **last** to get token embedding for those out of
            vocabulary words
        filter_spec_tokens : bool
            filter [CLS], [SEP] tokens.
        Returns
        -------
        List[(List[str], List[ndarray])]
            List of tokens, and tokens embedding
        """
        return self.embedding(sentences, oov_way, filter_spec_tokens)

    def embedding(self, sentences, oov_way='avg', filter_spec_tokens=True):
        # Apply mean pooling to get one fixed sized sentence vector
        pooling_model = models.Pooling(self.word_embedding_model.get_word_embedding_dimension(),
                                       pooling_mode_mean_tokens=True,
                                       pooling_mode_cls_token=False,
                                       pooling_mode_max_tokens=False)

        bert = SentenceTransformer(modules=[self.word_embedding_model, pooling_model])

        sentence_embeddings = bert.encode(sentences, output_value='token_embeddings')

        token_ids = []
        for sent in sentences:
            token_ids.append(bert.tokenize(sent))

        batches = []
        for token_id, embedding in zip(token_ids, sentence_embeddings):
            batches.append((token_id, embedding))

        return self.oov(batches, oov_way='avg', filter_spec_tokens=True)

    def oov(self, batches, oov_way='avg', filter_spec_tokens=True):
        """
        How to handle oov. Also filter out [CLS], [SEP] tokens.
        Parameters
        ----------
        batches : List[(tokens_id,
                        sequence_outputs,
                        pooled_output].
            batch   token_ids (max_seq_length, ),
                    sequence_outputs (max_seq_length, dim, ),
                    pooled_output (dim, )
        oov_way : str
            use **avg**, **sum** or **last** to get token embedding for those out of
            vocabulary words
        filter_spec_tokens : bool
            filter [CLS], [SEP] tokens.
        Returns
        -------
        List[(List[str], List[ndarray])]
            List of tokens, and tokens embedding
        """
        sentences = []
        for token_ids, sequence_outputs in batches:
            tokens = []
            tensors = []
            oov_len = 1
            for token_id, sequence_output in zip(token_ids, sequence_outputs):
                if token_id == 1:
                    # [PAD] token, sequence is finished.
                    break
                if (token_id in (2, 3)) and filter_spec_tokens:
                    # [CLS], [SEP]
                    continue
                token = self.word_embedding_model.tokenizer.convert_ids_to_tokens([token_id])[0] #self.tokenizer.convert_ids_to_tokens([token_id])[0] #self.vocab.idx_to_token[token_id]
                if token.startswith('##'):
                    token = token[2:]
                    tokens[-1] += token
                    if oov_way == 'last':
                        tensors[-1] = sequence_output
                    else:
                        tensors[-1] += sequence_output
                    if oov_way == 'avg':
                        oov_len += 1
                else:  # iv, avg last oov
                    if oov_len > 1:
                        tensors[-1] /= oov_len
                        oov_len = 1
                    tokens.append(token)
                    tensors.append(sequence_output)
            if oov_len > 1:  # if the whole sentence is one oov, handle this special case
                tensors[-1] /= oov_len
            sentences.append((tokens, tensors))
        return sentences


## Create a search index

In [12]:
# Original code from: https://gist.github.com/avidale/c6b19687d333655da483421880441950

from sklearn.neighbors import KDTree
import numpy as np


class ContextNeighborStorage:
    def __init__(self, sentences, model):
        self.sentences = sentences
        self.model = model

    def process_sentences(self):
        result = self.model(self.sentences)
        self.sentence_ids = []
        self.token_ids = []
        self.all_tokens = []
        all_embeddings = []
        for i, (toks, embs) in enumerate(tqdm(result)):
            for j, (tok, emb) in enumerate(zip(toks, embs)):
                self.sentence_ids.append(i)
                self.token_ids.append(j)
                self.all_tokens.append(tok)
                all_embeddings.append(emb)
        all_embeddings = np.stack(all_embeddings)
        # we normalize embeddings, so that euclidian distance is equivalent to cosine distance
        self.normed_embeddings = (all_embeddings.T / (all_embeddings**2).sum(axis=1) ** 0.5).T
        print("count self.normed_embeddings: ", len(self.normed_embeddings))

    def build_search_index(self):
        # this takes some time
        self.indexer = KDTree(self.normed_embeddings)

    def query(self, query_sent, query_word, k=10, filter_same_word=False):
        toks, embs = self.model([query_sent])[0]
        found = False
        for tok, emb in zip(toks, embs):
            if tok == query_word:
                found = True
                break
        if not found:
            raise ValueError('The query word "{}" is not a single token in sentence {}'.format(query_word, toks))
        emb = emb / sum(emb**2)**0.5

        if filter_same_word:
            initial_k = max(k, 100)
        else:
            initial_k = k
        di, idx = self.indexer.query(emb.reshape(1, -1), k=initial_k)
        distances = []
        neighbors = []
        contexts = []
        for i, index in enumerate(idx.ravel()):
            token = self.all_tokens[index]
            if filter_same_word and (query_word in token or token in query_word or query_word.lower() in token or token in query_word.lower()):
                continue
            distances.append(di.ravel()[i])
            neighbors.append(token)
            contexts.append(self.sentences[self.sentence_ids[index]])
            if len(distances) == k:
                break
        
        return distances, neighbors, contexts

In [13]:
simplewiki_medcon = pd.read_csv('../datasources/simplewiki/extracted/swiki_sentences_ngrams_filtered.csv', sep=';')
simplewiki_sentences = simplewiki_medcon.sentence.tolist()

In [14]:
path_to_clinicalbert = '../BERT_models/biobert_pretrain_output_disch_100000'
clinicalbert = BertEmbedding(model=path_to_clinicalbert)

INFO:transformers.configuration_utils:loading configuration file ../BERT_models/biobert_pretrain_output_disch_100000/config.json
INFO:transformers.configuration_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "use_bfloat16": false,
  "vocab_size": 28996
}

INFO:transformers.modeling_utils:loading weights file ../BERT_models/biobert_pretrain_output_disch_100000/pytorch_model.bin
INFO:transfor

In [15]:
storage = ContextNeighborStorage(sentences=simplewiki_sentences, model=clinicalbert)
storage.process_sentences()

INFO:root:Use pytorch device: cuda
Batches: 100%|██████████| 1587/1587 [00:34<00:00, 45.75it/s]
100%|██████████| 12694/12694 [00:00<00:00, 179225.42it/s]


count self.normed_embeddings:  238413


In [7]:
storage.build_search_index()

In [8]:
# Store data (serialize)
with open('./simplewiki_KDTree_clinicalbert.pickle', 'wb') as handle:
    pickle.dump(storage, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [16]:
# Load data (deserialize)
with open('./simplewiki_KDTree_clinicalbert.pickle', 'rb') as handle:
    tree_copy = pickle.load(handle)

In [27]:
# tree_copy.indexer.get_arrays()

In [28]:
# tree_copy.indexer.get_tree_stats()

# Experiments

__Meningitis__ \
CHV_selection: ['meningitis', 'meningitis/encephalitis', 'pachymeningitis'] \
"Alexandra died in 1869 from meningitis." \
"It would if she developed meningitis." \
"This man has meningitis." \
"He doesn't even have meningitis." \
"There's no evidence of meningitis on that MRI." \
"Well, then she's dying, 'cause the meningitis treatment isn't helping her." \
"How about them cheap meningitis drugs they pawning off in Africa?" \
"No meningitis, no other infections." \
"She could be, but a meningitis patient will be without a CT scan." \
"The antibiotic we gave you in case you had meningitis can turn your tears reddish." \
"If it's meningitis, we have to lD the bug fast." \
"If it was meningitis, we'd all be sick." \
"Progression's too fast.More likely meningitis." \
"LP was negative for meningitis." \
"Just like everybody else we've had to give meningitis shots to." \
"They swap spit, virus travels to his brain, leads to meningitis, leads to increased intracranial pressure." \
"What if it was caused by nf2 cancer instead of his childhood meningitis?" \
"Run CSF tests, find out what's causing the meningitis."

__Gingivitis__ \
CHV_selection: ['gum inflammation'] \
"The leaves and bark are used for controlling blood pressure and gingivitis."

__Atrophy__ \
CHV_selection: ['atrophy', 'atrophic'] \
"Unable to function, the muscles weaken and exhibit atrophy." \
"If you don't exercise it, it'll atrophy, just like anything else."

__SMA__ \
CHA_selection: ['spinal muscular atrophy', 'kennedy syndrome'] \
"Infection stresses his already weakened system, makes his SMA worse."

__leukoencephalopathy__ \
CHV_selection: ['leukoencephalopathy'] \
"What about leukoencephalopathy in a 16-year-old?"

__lameness__ \
CHV_selection: ['claudication'] \
"One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent lameness."

__Myelofibrosis__ \
CHV_selection: ['agnogenic myeloid metaplasia', 'idiopathic myelofibrosis', 'myelosclerosis with myeloid metaplasia', 'primary myelofibrosis', 'myelofibrosis'] \
CHV_sub: "idiopathic myelofibrosis" \
"Myelofibrosis fits, but testing takes at least 48 hours."

__IBD__ \
CHV_selection: ['inflammatory bowel disease', 'irritable bowel syndrome (IBS)'] \
"Biopsy showed non-specific inflammation, which suggests IBD."

__hypoperfusion__ \
CHV_selection: ['hypoperfusion', 'respiratory distress syndrome'] \
"But SARS explains the cough, causes hypoperfusion, which explains the ischemic bowel." \
"There's a slight hypoperfusion in the anterior cortex." \
"The condition usually results from infection, injury (accident, surgery), hypoperfusion and hypermetabolism."

__melanoma__ \
CHV_selection: ['melanoma'] \
"Check for melanomas." \
"From considering melanoma as a diagnosis." \
"Unfortunately, it also prevents even those with advanced medical degrees from considering melanoma as a diagnosis." \
"The biopsy revealed it's melanoma." \
"The woman has melanoma." \
"The woman has melanoma, she could die if we don't..." \
"Since the baby has the melanoma, we kind of already knew the mom had it." \
"What's interesting is that mom's melanoma spread to the baby in utero, which means it's metastatic, which means it's late stage." \
"How many people with a late stage melanoma are as healthy as she is?" \
"Clearly, something's going on in mom's blood that's treating her melanoma as well as her baby's." \
"If her immune system was fighting the melanoma, it never would have spread." \
"Exactly, which means it's fighting something else, and the melanoma cells are getting caught in the crossfire." \
"Could his dad's melanoma be a factor here?" \
"If the kid's dad died young of melanoma, how come the overprotective mom never took him to a dermatologist?" \
"His family history of melanoma may have affected his cellular outlook." \
"If we had more data on the exact type of melanoma his father had..."

__Granuloma__ \
CHV_selection: ['granuloma', 'granulomatous lesion'] \
CHV_sub: "granulomatous lesion" \
"Or infections that cause granulomas," \
"Time to scan her for infections that could cause granulomas." \
"Lungs clear for granulomas." \
'The nodules in his spleen were granulomas.' \
"He could have had a granuloma in his sinuses that bled, which could have been caused by Wegener's." \
"Could be granulomas, could be plaques." \
"We can nuke them with a mild course of chemo and then remove the granuloma." \
"Looked like a granuloma on the MRI, but it's not." \
"He's got wegener's, which means he's got granulomas." \
"MRI showed a granuloma in his liver." \
"He had a granuloma in his liver." \
"They found granulomas in Lucy's pericardium."

__sarcoidosis__ \
CHV_selection: ['sarcoidosis'] \
"But the calcified pineal could be sarcoidosis." \
"The PET scan was negative for sarcoidosis." \
"Sarcoidosis is a diagnosis of exclusion, which means that the cowards who spend their time excluding what it isn't usually cause their patients to die from what it is." \
"Sarcoidosis could be inflamed by the treatment." \
"I'm fairly confident our patient's got sarcoidosis." \
"We ran some tests and the results point toward sarcoidosis." \
"And triggered a dormant sarcoidosis." \
"Sarcoidosis could be in his brain and lungs." \
"If sarcoidosis has two environmental causes, it has environmental causes." \
"Sarcoidosis isn't infectious or environmental." \
"Sarcoidosis explains almost all of their symptoms." \
"A.C.E. Levels are too low for sarcoidosis." \
"Maggie tested negative for sarcoidosis." \
"Sarcoidosis hits the spleen and the liver." \
"Sarcoidosis is progressive." \
"The heart could point to sarcoidosis."

__hemosiderosis__ \
CHV_selection: ['hemosiderosis'] \
"The donor had hemosiderosis."

__amyloidosis__ \
CHV_selection: ['amyloidosis'] \
"The largest risk factors for spontaneous bleeding are high blood pressure and amyloidosis." \
"Negative for amyloidosis." \
"Plus internal bleeding, equals amyloidosis." \
"He's bleeding way too much for amyloidosis." \
"Except that nothing in his medical history remotely indicates amyloidosis." \
"He needs a bone marrow transplant for the amyloidosis." \
"Only way to treat amyloidosis is to treat whatever's causing it." \
"Amyloidosis with systemic involvement." \
"If you had amyloidosis, there'd be speckling on your cardiac ultrasound." \
"We'll do a kidney biopsy to test for amyloidosis." \
"Biopsy showed deposits in the renal endothelium consistent with amyloidosis."

__laryngospasm__ \
CHV_selection: ['laryngospasm'] \
"Do a bronchoscopy, it'll set off a laryngospasm." \
"This is our chance to prove laryngospasm."

__porphyria__ \
CHV_selection: ['porphyria'] \
"If she had porphyria, you would have seen purple urine." \
"Pulmonary involvement rules out porphyria." \
"It's also a porphyria trigger." \
"I was wrong about the porphyria, but I wasn't wrong about the nervous system." \
"If you're wrong about porphyria, the treatment could box her kidneys." \
"it's porphyria and it's moving fast."

__nystagmus__ \
CHV_selection: ['nystagmus'] \
"Also, many blind people have nystagmus, which is one reason that some wear dark glasses." \
"There are two key forms of nystagmus: pathological and physiological, with variations within each type."

__bronchiectasis__ \
CHV_selection: ['bronchiectasis'] \
"People with bronchiectasis may have bad breath indicative of active infection." \
"Some people with bronchiectasis may produce frequent green/yellow sputum (up to 240ml (8 oz) daily)." \
"Bronchiectasis has both congenital and acquired causes, with the latter more frequent."

In [3]:
sentences = [
        ("The antibiotic we gave you in case you had meningitis can turn your tears reddish.",
         "meningitis"),
        ("They swap spit, virus travels to his brain, leads to meningitis, leads to increased intracranial pressure.",
         "meningitis"),
        ("The leaves and bark are used for controlling blood pressure and gingivitis.",
         "gingivitis"),
        ("Unable to function, the muscles weaken and exhibit atrophy.", "atrophy"),
        ("If you don't exercise it, it'll atrophy, just like anything else.",
         "atrophy"),
        ("Infection stresses his already weakened system, makes his SMA worse.",
         "SMA"),
        ("One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent lameness.",
         "lameness"),
        ("But SARS explains the cough, causes hypoperfusion, which explains the ischemic bowel.",
         "hypoperfusion"),
        ("There's a slight hypoperfusion in the anterior cortex.",
         "hypoperfusion"),
        ("The condition usually results from infection, injury (accident, surgery), hypoperfusion and hypermetabolism.",
         "hypoperfusion"),
        ("Myelofibrosis fits, but testing takes at least 48 hours.",
         "Myelofibrosis"),
        ("Biopsy showed non-specific inflammation, which suggests IBD.", "IBD"),
        ("The biopsy revealed it's melanoma.", "melanoma"),
        ("What's interesting is that mom's melanoma spread to the baby in utero, which means it's metastatic, which means it's late stage.",
         "melanoma"),
        ("How many people with a late stage melanoma are as healthy as she is?",
         "melanoma"),
        ("His family history of melanoma may have affected his cellular outlook.",
         "melanoma"),
        ("If the kid's dad died young of melanoma, how come the overprotective mom never took him to a dermatologist?",
         "melanoma"),
        ("He could have had a granuloma in his sinuses that bled, which could have been caused by Wegener's.",
         "granuloma"), ("MRI showed a granuloma in his liver.", "granuloma"),
        ("We can nuke them with a mild course of chemo and then remove the granuloma.",
         "granuloma"),
        ("Time to scan her for infections that could cause granulomas.",
         "granulomas"),
        ("But the calcified pineal could be sarcoidosis.", "sarcoidosis"),
        ("Sarcoidosis could be in his brain and lungs.", "Sarcoidosis"),
        ("The PET scan was negative for sarcoidosis.", "sarcoidosis"),
        ("Sarcoidosis isn't infectious or environmental.", "Sarcoidosis"),
        ("The donor had hemosiderosis.", "hemosiderosis"),
        ("The largest risk factors for spontaneous bleeding are high blood pressure and amyloidosis.",
         "amyloidosis"),
        ("If you had amyloidosis, there'd be speckling on your cardiac ultrasound.",
         "amyloidosis"),
        ("Biopsy showed deposits in the renal endothelium consistent with amyloidosis.",
         "amyloidosis"),
        ("He needs a bone marrow transplant for the amyloidosis.", "amyloidosis"),
        ("Do a bronchoscopy, it'll set off a laryngospasm.", "laryngospasm"),
        ("If she had porphyria, you would have seen purple urine.", "porphyria"),
        ("If you're wrong about porphyria, the treatment could box her kidneys.",
         "porphyria"),
        ("Also, many blind people have nystagmus, which is one reason that some wear dark glasses.",
         "nystagmus"),
        ("There are two key forms of nystagmus: pathological and physiological, with variations within each type.",
         "nystagmus"),
        ("People with bronchiectasis may have bad breath indicative of active infection.",
         "bronchiectasis"),
        ("Some people with bronchiectasis may produce frequent green/yellow sputum (up to 240ml (8 oz) daily).",
         "bronchiectasis"),
        ("Bronchiectasis has both congenital and acquired causes, with the latter more frequent.",
         "Bronchiectasis"),
        ("However, Satie's military career did not last very long; within a few months he was discharged after deliberately infecting himself with bronchitis.",
         "bronchitis"),
        ("In January 1876, his father died of bronchitis following a long period of depression.",
         "bronchitis"),
        ("She died in The Hague on 20 March 1934, of complications from bronchitis at the age of 75, and was buried in Delft.",
         "bronchitis"),
        ("She died in The Hague on 20 March 1934, of complications from bronchitis at the age of 75.",
         "bronchitis"),
        ("Menuhin died in Martin Luther Hospital, Berlin, Germany, from complications of bronchitis.",
         "bronchitis"),
        ("It is one of the rarest forms of synesthesia.", "synesthesia"),
        ("An outbreak of tularemia occurred in Kosovo in 1999-2000.", "tularemia"),
        ("Over the following summers, Martha's Vineyard was identified as the only place in the world where documented cases of tularemia resulted from lawn mowing.",
         "tularemia"),
        ("Tularemia is not spread directly from person to person.", "Tularemia"),
        ("There is currently no effective treatment or cure for akinetopsia.",
         "akinetopsia"),
        ("Patients with akinetopsia struggle with many issues in their day-to-day life, depending on the severity of their condition.",
         "akinetopsia"),
        ("Besides simple perception, akinetopsia also disturbs visuomotor tasks, such as reaching for objects and catching objects.",
         "akinetopsia"),
        ("Inconspicuous akinetopsia can be selectively and temporarily induced using transcranial magnetic stimulation (TMS) of area V5 of the visual cortex in healthy subjects.",
         "akinetopsia"),
        ("Only in a few cases and after many years does it cause demyelination.",
         "demyelination"),
    ("Ho was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.",
     "cardiomyopathy"),
    ("He was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.",
     "cardiomyopathy"),
    ("His echocardiogram was negative for cardiomyopathy.", "cardiomyopathy"),
    ("And a subsequent biopsy revealed irreversible cardiomyopathy.",
     "cardiomyopathy"),
    ("The kid has severe cardiomyopathy.", "cardiomyopathy"),
    ("Which means it could be a hematological problem plus cardiomyopathy",
     "cardiomyopathy"),
    ("An acute myocardial infarction, happens when a blood vessel in the heart suddenly becomes blocked.",
     "myocardial"),
    ("Patau syndrome is a syndrome caused by a chromosomal abnormality, in which some or all of the cells of the body contain extra genetic material from chromosome 13.",
     "Patau"),
    ("Most cases of Patau syndrome are not inherited, but occur as random events during the formation of reproductive cells (eggs and sperm).",
     "Patau"),
    ("But a baby with Patau syndrome has 3 copies of chromosome 13, instead of 2.",
     "Patau"),
    ("Nora is diagnosed with Trisomy 13.", "Trisomy"),
    ("The patient has trisomy 13.", "trisomy"),
    ("The patient has trisomy 21.", "trisomy"),
    ("Rarely very low blood pressure may be the only sign of anaphylaxis.",
     "anaphylaxis"),
    ("Worldwide, 0.05-2% of the population is estimated to have anaphylaxis at some point in life, and rates appear to be increasing.",
     "anaphylaxis"),
    ("On a mechanistic level, anaphylaxis is caused by the release of mediators from certain types of white blood cells triggered either by immunologic or non-immunologic mechanisms.",
     "anaphylaxis"),
    ("Previous systemic reactions, which are anything more than a local reaction around the site of the sting, are a risk factor for future anaphylaxis; however, half of fatalities have had no previous systemic reaction.",
     "anaphylaxis"),
    ('In a person who died from anaphylaxis, autopsy may show an "empty heart" attributed to reduced venous return from vasodilation and redistribution of intravascular volume from the central to the peripheral compartment.',
     "anaphylaxis"),
    ("Anaphylaxis can occur in response to almost any foreign substance.",
     "Anaphylaxis"),
    ("Many foods can trigger anaphylaxis; this may occur upon the first known ingestion.",
     "anaphylaxis"),
    ('People prone to anaphylaxis are advised to have an "allergy action plan."',
     "anaphylaxis"),
    ("The antitoxin could cause anaphylaxis.", "anaphylaxis"),
    ("A food allergy explains the anaphylaxis.", "anaphylaxis"),
    ("They die of dysentery.", "dysentery"),
    ("Henry II moved in support of Richard, and Henry the Young King died from dysentery at the end of the campaign.",
     "dysentery"),
    ("Once at Richmond, Madison began drafting the Report, though he was delayed by a weeklong battle with dysentery.",
     "dysentery"),
    ("Psoriasis varies in severity from small, localized patches to complete body coverage.",
     "Psoriasis"),
    ("Psoriasis is generally thought to be a genetic disease that is triggered by environmental factors.",
     "Psoriasis"),
    ("There are five main types of psoriasis: plaque, guttate, inverse, pustular, and erythrodermic.",
     "psoriasis"),
    ("If one twin has psoriasis, the other twin is three times more likely to be affected if the twins are identical than if they are non-identical.",
     "psoriasis"),
    ("High doses may lead to muscles contractions.", "contractions"),
    ("High doses may lead to convulsions.", "convulsions"),
    ("To reduce the encephalocele cyst.", "encephalocele"),
    ("Epistaxis in children is usually from Little's area, which is on the septal wall anteriorly.",
     "Epistaxis"),
    ("Once epistaxis occurs, the importance of the first treatment for the haemostasis should be emphasized.",
     "epistaxis")
]

In [4]:
import sys
sys.path.insert(1, '../LexSimp_BERT_MLM/')

from transformers import BertForMaskedLM, BertTokenizer
import torch
import MLM_LS
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity as cosine
import pandas as pd
from tqdm import tqdm
import pickle

In [5]:
def substitution_selection(source_word, closest_candidates, stopwords, ps, num_selection):
    cur_tokens = []
    cur_contexts = []

    source_stem = ps.stem(source_word)

    assert num_selection <= len(closest_candidates)

    punctuationMarks = [',', '.', ';', ':', '(', '-', ')', '{', '}', '[', ']', '\\', '/', '!', '?', '<', '>', '"', "'"]
    
#     candidates = []
#     contexts = []
    for word, context in closest_candidates:
#         candidates.append(word)
#         contexts.append(context)
    
#     for i in range(len(candidates)):
#         token = candidates[i]
        token = word

        if token[0:2] == "##":
            continue

        if (token == source_word):
            continue

        if str(token).isdigit():
            continue

        if token.isalpha() and len(token) == 1:
            continue

        if token in punctuationMarks:
            continue

        if token.lower() in stopwords:
            continue

        token_stem = ps.stem(token)

        if (token_stem.lower() == source_stem.lower()):
            continue

        # if (len(token_stem) >= 3) and (token_stem[:3].lower() == source_stem[:3].lower()):
        #     continue  
        
        if token.lower() in [x.lower() for x in cur_tokens]:
            continue
            
        cur_tokens.append(token)
        
        cur_contexts.append(context)

        if (len(cur_tokens) == num_selection):
            break

    if (len(cur_tokens) == 0):
        candidates = closest_candidates[0:num_selection + 1]
        for d, w, c in candidates:
            cur_tokens.append(w)
            cur_contexts.append(c)
            
    assert len(cur_tokens) > 0
    assert len(cur_tokens) == len(cur_contexts)
    
    ss = list(zip(cur_tokens, cur_contexts))

    return ss

In [6]:
def get_score(sentence, tokenizer, maskedLM):
    tokenize_input = tokenizer.tokenize(sentence)
    tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
    tensor_input = tensor_input.to('cuda')
    sentence_loss = 0

    for i, word in enumerate(tokenize_input):
        original_word = tokenize_input[i]
        tokenize_input[i] = '[MASK]'
        # print(tokenize_input)
        mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
        mask_input = mask_input.to('cuda')
        with torch.no_grad():
            word_loss = maskedLM(mask_input, masked_lm_labels=tensor_input)[0].data.cpu().numpy()
        sentence_loss += word_loss
        tokenize_input[i] = original_word

    return np.exp(sentence_loss / len(tokenize_input))


def LM_score(source_word, source_context, substitution_selection, tokenizer, maskedLM):
    # source_index = source_context.index(source_word)

    source_sentence = ''

    for context in source_context:
        source_sentence += context + " "

    source_sentence = source_sentence.strip()
    # print(source_sentence)
    LM = []

    for substibution, context in substitution_selection:
        sub_sentence = source_sentence.replace(source_word, substibution)

        # print(sub_sentence)
        score = get_score(sub_sentence, tokenizer, maskedLM)
        LM.append(score)

    return LM

def preprocess_SR(source_word, substitution_selection, fasttext_dico, fasttext_emb, word_count):
    ss = []
    # ss_score=[]
    sis_scores=[]
    count_scores=[]
    # source_count = 10
    # if source_word in word_count:
    #     source_count = word_count[source_word]

    isFast = True

    if (source_word not in fasttext_dico):
        isFast = False
    else:
        source_emb = fasttext_emb[fasttext_dico.index(source_word)].reshape(1,-1)

    if isFast == False and source_word.lower() in fasttext_dico:
        isFast = True
        source_emb = fasttext_emb[fasttext_dico.index(source_word.lower())].reshape(1,-1)

    # ss.append(source_word)

    for sub, context in substitution_selection:

        if sub.lower() not in word_count:
            continue
        else:
            sub_count = word_count[sub.lower()]

        # if sub_count<source_count:
        #     continue
        if isFast:
            if sub not in fasttext_dico:
                if sub.lower() not in fasttext_dico:
                    continue
                else:
                    sub_emb = fasttext_emb[fasttext_dico.index(sub.lower())].reshape(1, -1)
            else:
                sub_emb = fasttext_emb[fasttext_dico.index(sub)].reshape(1, -1)

            sis = cosine(source_emb, sub_emb)[0][0]

            # if sis<0.35:
            #    continue
            sis_scores.append(sis)

        ss.append((sub, context.strip()))
        count_scores.append(sub_count)

    return ss, sis_scores, count_scores

def substitution_ranking(source_word, source_context, substitution_selection, fasttext_dico, fasttext_emb, word_count,
                         tokenizer, maskedLM):
    ss, sis_scores, count_scores = preprocess_SR(source_word, substitution_selection, fasttext_dico, fasttext_emb,
                                                 word_count)

    if len(ss) == 0:
        return source_word

    if len(sis_scores) > 0:
        seq = sorted(sis_scores, reverse=True)
        sis_rank = [seq.index(v) + 1 for v in sis_scores]

    rank_count = sorted(count_scores, reverse=True)
    count_rank = [rank_count.index(v) + 1 for v in count_scores]

    lm_score = LM_score(source_word, source_context, ss, tokenizer, maskedLM)
    rank_lm = sorted(lm_score)
    lm_rank = [rank_lm.index(v) + 1 for v in lm_score]

    bert_rank = []
    for i in range(len(ss)):
        bert_rank.append(i + 1)

    if len(sis_scores) > 0:
        all_ranks = [bert + sis + count + LM for bert, sis, count, LM in zip(bert_rank, sis_rank, count_rank, lm_rank)]
    else:
        all_ranks = [bert + count + LM for bert, count, LM in zip(bert_rank, count_rank, lm_rank)]

#     print("bert_rank: ", bert_rank)
#     if len(sis_scores) > 0:
#         print("sis_rank: ", sis_rank)
#     print("count_rank: ", count_rank)
#     print("lm_rank: ", lm_rank)
#     print("all_ranks: ", all_ranks)
    
    substitution_rank = list(zip(ss, all_ranks))
    sort_substitution_rank = sorted(substitution_rank, key=lambda x: x[1])
#     print("Sorted substitution rank:\n", sort_substitution_rank)
    
    sorted_substitutions = []
    for i, item in enumerate(sort_substitution_rank):
        sorted_substitutions.append(item[0])

    return sorted_substitutions


In [7]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('../BERT_models/biobert_pretrain_output_disch_100000', do_lower_case=False)

INFO:transformers.tokenization_utils:Model name '../BERT_models/biobert_pretrain_output_disch_100000' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, bert-base-finnish-cased-v1, bert-base-finnish-uncased-v1). Assuming '../BERT_models/biobert_pretrain_output_disch_100000' is a path or url to a directory containing tokenizer files.
INFO:transformers.tokenization_utils:Didn't find file ../BERT_models/biobert_pretrain_output_disch_100000/added_tokens.json. We won't load it.
INFO:transformers.tokenization_utils:Didn't find file ../BERT_models/biobert_

In [8]:
# Load fastText word embeddings
print("Loading embeddings ...")
wordVecPath = '../lex_simp_with_masked_language_model/word_embeddings/fastText/crawl-300d-2M-subword.vec'
fasttext_dico, fasttext_emb = MLM_LS.getWordmap(wordVecPath)

# Load word frequency
word_count_path = '../lex_simp_with_masked_language_model/word_frequency/counter_Tokens.p'
with open(word_count_path, 'rb') as f:
    word_count = pickle.load(f)

# Load BERT MLM
model = BertForMaskedLM.from_pretrained('../BERT_models/biobert_pretrain_output_disch_100000') #, do_lower_case=False)
model.to('cuda')
model.eval()

Loading embeddings ...
2000000 300



INFO:transformers.configuration_utils:loading configuration file ../BERT_models/biobert_pretrain_output_disch_100000/config.json
INFO:transformers.configuration_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "use_bfloat16": false,
  "vocab_size": 28996
}

INFO:transformers.modeling_utils:loading weights file ../BERT_models/biobert_pretrain_output_disch_100000/pytorch_model.bin
INFO:transfor

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [10]:
# Load stop words
stopword_list1 = set(stopwords.words('english'))
with open('../lex_simp_with_masked_language_model/MySQL_MyISAM_stopwords.txt', "r") as f:
    stopword_list2 = set(eval(f.read()))
stopword_list = stopword_list1.union(stopword_list2)

ps = PorterStemmer()

num_selection = 12

## ClinicalBERT

### Selection > ranking

In [23]:
def output_ss_sr(query_sent, query_word, k):

    tokens, words, position = MLM_LS.convert_sentence_to_token(sentence=query_sent, tokenizer=tokenizer, seq_length=256)

#     print("tokens: ", tokens)
#     print("words: ", words)
#     print("position: ", position)

    distances, neighbors, contexts= tree_copy.query(query_sent=query_sent, query_word=query_word, k=k, filter_same_word=True)

    closest_candidates = list(zip(neighbors, contexts))
#     print(" ")
#     print(len(closest_candidates))
#     print(closest_candidates)
#     print(" ")

    ss = substitution_selection(source_word=query_word,
                                closest_candidates=closest_candidates,
                                stopwords=stopword_list,
                                ps=ps,
                                num_selection=num_selection)
#     print(len(ss))
#     print(ss)

    mask_index = words.index(query_word)
#     print("mask_index: ", mask_index)
    window_context = 11
    mask_context = MLM_LS.extract_context(words, mask_index, window_context)

    sr = substitution_ranking(source_word=query_word,
                            source_context=mask_context,
                            substitution_selection=ss,
                            fasttext_dico=fasttext_dico,
                            fasttext_emb=fasttext_emb,
                            word_count=word_count,
                            tokenizer=tokenizer,
                            maskedLM=model)
    
    print(" ")
    print("query_sent: ", query_sent)
    print("query_word: ", query_word)
    print("Top substitutions:")
    for i, item in enumerate(sr):
        print(f'{i+1}. {item}')


In [24]:
for query_sent, query_word in tqdm(sentences):
    output_ss_sr(query_sent, query_word, k=50)

  0%|          | 0/34 [00:00<?, ?it/s]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 44.14it/s]
  3%|▎         | 1/34 [00:02<01:07,  2.04s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.16it/s]

 
query_sent:  Ho was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.
query_word:  cardiomyopathy
Top substitutions:
1. ('arrhythmia', 'Heart   Heart disease   Cardiac arrhythmia')
2. ('rhabdomyosarcoma', 'For example, adults with rhabdomyosarcoma usually have tumors that grow faster and are harder to treat.')
3. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
4. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
5. ('myocardial', 'redirect myocardial infarction')
6. ('leiomyosarcomas', 'It has been believed that leiomyosarcomas do not arise from leiomyomas.')
7. ('myotonic', 'There is no cure for myotonic dystrophy.')
8. ('ciliopathy', 'It is a ciliopathy.')
9. ('transplantation', 'Patients with this disease usually undergo heart transplantation.')
10. ('dysthymia', 'People with dysthymia have de


  6%|▌         | 2/34 [00:03<01:02,  1.94s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 103.28it/s]

 
query_sent:  His echocardiogram was negative for cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
2. ('pathophysiology', 'This change has occurred because the older dichotomous classification did not reflect pathophysiology or outcome.')
3. ('tachycardia', 'A doctor can diagnose ventricular tachycardia by doing an electrocardiogram (also called an ECG or an EKG).')
4. ('myocardial', 'redirect myocardial infarction')
5. ('dystrophy', 'There is no cure for myotonic dystrophy.')
6. ('myotonic', 'There is no cure for myotonic dystrophy.')
7. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
8. ('leiomyosarcomas', 'It has been believed that leiomyosarcomas do not arise from leiomyomas.')
9. ('cardioversion', 'If a person still has a pulse, ventricular tach


  9%|▉         | 3/34 [00:05<00:59,  1.93s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.06it/s]

 
query_sent:  And a subsequent biopsy revealed irreversible cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
2. ('myocardial', 'redirect myocardial infarction')
3. ('dystrophy', 'There is no cure for myotonic dystrophy.')
4. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
5. ('ventricular', 'This is called idiopathic ventricular tachycardia.')
6. ('myotonic', 'There is no cure for myotonic dystrophy.')
7. ('leiomyosarcomas', 'It has been believed that leiomyosarcomas do not arise from leiomyomas.')
8. ('rhabdomyosarcoma', 'This type of rhabdomyosarcoma usually grows faster than embryonal rhabdomyosarcoma, and needs more treatment than ERMS.')
9. ('cardioverter', '(This special defibrillator is called an " implantable cardioverter-defibrillator. "')
1


 12%|█▏        | 4/34 [00:06<00:52,  1.75s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 99.49it/s]

 
query_sent:  The kid has severe cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
2. ('dystrophy', 'There is no cure for myotonic dystrophy.')
3. ('ciliopathy', 'It is a ciliopathy.')
4. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
5. ('myotonic', 'There is no cure for myotonic dystrophy.')
6. ('Endocarditis', 'Endocarditis most often affects the heart valves.')
7. ('Myocardial', 'Myocardial means relating to the heart muscle.')
8. ('leiomyosarcomas', 'It has been believed that leiomyosarcomas do not arise from leiomyomas.')
9. ('leukemia', 'Chronic leukemia grows slowly.')
10. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
11. ('transplantation', 'Patients with this disease usually undergo heart transplan


 15%|█▍        | 5/34 [00:09<00:53,  1.84s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 100.02it/s]

 
query_sent:  Which means it could be a hematological problem plus cardiomyopathy
query_word:  cardiomyopathy
Top substitutions:
1. ('ciliopathy', 'It is a ciliopathy.')
2. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
3. ('rhabdomyolysis', 'Use of fibrates and statins at the same time makes rhabdomyolysis happen much more often.')
4. ('Myocardial', 'Myocardial means relating to the heart muscle.')
5. ('cardiotoxicity', 'Large doses of quinine can cause:   skin rashes   deafness   sleepiness   blindness   anaphylactic shock   disturbances in abnormal heart rhythm   death from cardiotoxicity.')
6. ('myotonic', 'There is no cure for myotonic dystrophy.')
7. ('Endocarditis', 'Endocarditis most often affects the heart valves.')
8. ('leiomyosarcomas', 'It has been believed that leiomyosarcomas do not arise from leiomyomas.')
9. ('cardioversion', 'If a person still has a pulse, ventricular tach


 18%|█▊        | 6/34 [00:11<00:52,  1.89s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 100.36it/s]

 
query_sent:  An acute myocardial infarction, happens when a blood vessel in the heart suddenly becomes blocked.
query_word:  myocardial
Top substitutions:
1. ('infarction', 'An acute myocardial infarction, also called a heart attack, happens when a blood vessel in the heart suddenly becomes blocked.')
2. ('ischemic', 'The most common cause is ischemic heart disease (heart disease that causes the heart to not get enough oxygen).')
3. ('coronary', 'Cardiovascular disease includes coronary artery diseases (CAD) such as angina and myocardial infarction (commonly known as a heart attack).')
4. ('cardiomyopathy', 'Problems caused by this may include heart failure due to cardiomyopathy or cardiac arrest.')
5. ('ischemia', 'If the ischemia lasts long enough, the heart muscle that is not getting enough oxygen dies.')
6. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
7. ('myocardium', 'The most important thing is to


 21%|██        | 7/34 [00:13<00:52,  1.96s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 79.30it/s]

 
query_sent:  Patau syndrome is a syndrome caused by a chromosomal abnormality, in which some or all of the cells of the body contain extra genetic material from chromosome 13.
query_word:  Patau
Top substitutions:
1. ('Rett', 'Rett syndrome is an almost entirely female based disorder which affects girls.')
2. ('Spinal', 'Spinal muscular atrophy is inherited from parents who might or might not have the disease themselves but both of whom have a fault (mutation) in their genes that predisposes to the disease.')
3. ('Fragile', 'Fragile X syndrome occurs as a result of a mutation of the FMR1 gene on the X chromosome.')
4. ('Tourette', 'Tourette syndrome is an inherited neurological disorder.')
5. ('Chagas', 'Chagas disease is a disease caused by the protozoan parasite Trypanosoma cruzi.')
6. ('Malaria', 'Malaria is an infectious disease caused by a parasite: it is spread by the bite of an infected mosquito.')
7. ('Jacobsen', 'Jacobsen syndrome (also known as 11q deletion syndrome) is a g


 24%|██▎       | 8/34 [00:14<00:48,  1.86s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.23it/s]

 
query_sent:  Most cases of Patau syndrome are not inherited, but occur as random events during the formation of reproductive cells (eggs and sperm).
query_word:  Patau
Top substitutions:
1. ('SIDS', 'Scientists think that about 10 to 20% of SIDS cases are caused by inherited defects in the ion channels that help the heart squeeze out blood.')
2. ('Autism', 'There may be many different causes for the different types of Autism Spectrum Disorder.')
3. ('Stickler', 'There are six types of Stickler syndrome known, and each has its own gene.')
4. ('CIDP', 'Specially important are:   An asymmetrical variant of CIDP is known as Lewis-Sumner Syndrome.')
5. ('CUP', 'A diagnosis of CUP requires a clinical picture with metastatic disease and one or more biopsy results inconsistent with a primary tumor.')
6. ('gout', 'Metabolic syndrome happens along with nearly 75% of all cases of gout.')
7. ('sepsis', 'Today, the bacterial forms of sepsis can be treated with antibiotics.')
8. ('ear', 'Scientist


 26%|██▋       | 9/34 [00:16<00:44,  1.80s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 106.94it/s]

 
query_sent:  But a baby with Patau syndrome has 3 copies of chromosome 13, instead of 2.
query_word:  Patau
Top substitutions:
1. ('Rett', 'People with Rett syndrome also have scoliosis.')
2. ('schizophrenia', 'The risk is even higher if you have an identical twin with schizophrenia.')
3. ('trisomy', 'People with trisomy 18 have three copies of chromosome 18. "')
4. ('GDM', 'Some women with GDM are treated with drugs.')
5. ('DKA', 'People with DKA need to be treated in a hospital.')
6. ('Tourette', "Most people with Tourette's do not need treatment.")
7. ('POTS', 'People with POTS also have other symptoms of orthostatic intolerance (symptoms that get worse when a person is standing up, and get better when the person is lying down).')
8. ('Lassa', 'If a patient is diagnosed with Lassa fever, then the patient will be kept away from other people, to prevent the spread of the virus.')
9. ('BPD', 'Many people with BPD were abused when they were children.')
10. ('dwarfism', 'In the 19th ce


 29%|██▉       | 10/34 [00:17<00:39,  1.66s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.18it/s]

 
query_sent:  Nora is diagnosed with Trisomy 13.
query_word:  Trisomy
Top substitutions:
1. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
2. ('autosomal', 'It is the second most common autosomal trisomy, after Down Syndrome, that carries to term.')
3. ('Leukemia', 'Leukemia & Lymphoma Society website')
4. ('aneuploidy', 'Among those who survive birth, Down syndrome is the most common form of aneuploidy.')
5. ('Polyploidy', 'Trisomy   Polyploidy')
6. ('chromosomes', 'Patau Sydrome, also known as Trisomy 13 or Trisomy D is a problem with the chromosomes.')
7. ('Myelodysplastic', 'The Myelodysplastic syndrome is a group of diseases and conditions that affect how blood is made.')
8. ('thyroid', '2000: 28-year-old Colombian-American actress and model Sofía Vergara was diagnosed with thyroid cancer.')
9. ('recessive', 'The disorder is X-linked recessive.')
10. ('Parasomnias', '& db=mesh & list_uids=68020920 & dopt=Full 


 32%|███▏      | 11/34 [00:18<00:34,  1.52s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.17it/s]

 
query_sent:  The patient has trisomy 13.
query_word:  trisomy
Top substitutions:
1. ('bipolar', 'If one identical twin has bipolar disorder, there is a 40 to 70 percent chance that the other will develop bipolar disorder in their lifetime.')
2. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
3. ('diploid', 'Humans are diploid organisms.')
4. ('anemia', 'The infant has anemia   SIDS only happens at certain ages.')
5. ('sarcoma', 'As a result, there are many subtypes of sarcoma.')
6. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
7. ('bulimia', 'Most people who have bulimia are between the ages of 10 to 25.')
8. ('trichomoniasis', 'Most people with trichomoniasis do not have any symptoms.')
9. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
10. ('polyuria', 'This causes polyuria.')
11. ('pyromania', 'When children have pyromania, later in life they u


 35%|███▌      | 12/34 [00:20<00:31,  1.41s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.88it/s]

 
query_sent:  The patient has trisomy 21.
query_word:  trisomy
Top substitutions:
1. ('diploid', 'Humans are diploid organisms.')
2. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
3. ('bipolar', 'If one identical twin has bipolar disorder, there is a 40 to 70 percent chance that the other will develop bipolar disorder in their lifetime.')
4. ('bulimia', 'Most people who have bulimia are between the ages of 10 to 25.')
5. ('anemia', 'The infant has anemia   SIDS only happens at certain ages.')
6. ('sarcoma', 'As a result, there are many subtypes of sarcoma.')
7. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
8. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
9. ('polyuria', 'This causes polyuria.')
10. ('trichomoniasis', 'Most people with trichomoniasis do not have any symptoms.')
11. ('subtypes', 'There is a debate on the subtypes.')
12. ('castrate


 38%|███▊      | 13/34 [00:22<00:32,  1.55s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 99.36it/s]

 
query_sent:  Rarely very low blood pressure may be the only sign of anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'There is no reason why it should not be used if a person is having an anaphylactic reaction.')
2. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
3. ('angioedema', 'It is possible to have angioedema without hives.')
4. ('Epistaxis', 'REDIRECT Epistaxis')
5. ('anorexia', 'Four signs of anorexia are listed in the manual.')
6. ('phobias', 'Causes and risk factors phobias is very.')
7. ('narcolepsy', 'This is the most common symptom of narcolepsy.')
8. ('anthrax', 'If not treated, anthrax often leads to death.')
9. ('anorexics', 'To try to lose weight, anorexics do not eat enough.')
10. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
11. ('gout', 'X-rays are not useful for treating acute gout attacks.')
12. ('alexithymic', 


 41%|████      | 14/34 [00:23<00:32,  1.65s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 48.09it/s]

 
query_sent:  Worldwide, 0.05-2% of the population is estimated to have anaphylaxis at some point in life, and rates appear to be increasing.
query_word:  anaphylaxis
Top substitutions:
1. ('angioedema', 'It is possible to have angioedema without hives.')
2. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
3. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
4. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
5. ('anaphylactic', 'However, half of the people who die of anaphylaxis have had no anaphylactic reaction before.')
6. ('Epistaxis', 'REDIRECT Epistaxis')
7. ('anaemia', 'People affected may develop anaemia due to loss of blood.')
8. ('autistic', 'Similar to the outcomes in the autistic parents studies, most children having autistic siblings/ twins were found out to be showing autistic signs.')
9. ('anorexics',


 44%|████▍     | 15/34 [00:25<00:33,  1.79s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 98.49it/s]

 
query_sent:  On a mechanistic level, anaphylaxis is caused by the release of mediators from certain types of white blood cells triggered either by immunologic or non-immunologic mechanisms.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('angioedema', 'A form of angioedema happens in families and has different triggers, complications, and treatments.')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('anorexia', 'Many people think that anorexia is caused by wanting to be thin like models in magazines.')
5. ('Epistaxis', 'REDIRECT Epistaxis')
6. ('ataxia', 'Instead, ataxia is caused by a neurological problem.')
7. ('anorexics', 'To try to lose weight, anorexics do not eat enough.')
8. ('anosmia', 'In general, anosmia which has its cause in the brain cannot be


 47%|████▋     | 16/34 [00:27<00:32,  1.79s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 98.06it/s]

 
query_sent:  Previous systemic reactions, which are anything more than a local reaction around the site of the sting, are a risk factor for future anaphylaxis; however, half of fatalities have had no previous systemic reaction.
query_word:  anaphylaxis
Top substitutions:
1. ('angioedema', 'It is possible to have angioedema without hives.')
2. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
3. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
4. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
5. ('Epistaxis', 'REDIRECT Epistaxis')
6. ('migraine', 'The exact causes of menstrual migraine are not known for sure but there is a link between falling levels of the female hormone estrogen and the onset of a migraine attack.')
7. ('allergic', 'Anaphylaxis is a seriou


 50%|█████     | 17/34 [00:29<00:31,  1.83s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 103.20it/s]

 
query_sent:  In a person who died from anaphylaxis, autopsy may show an "empty heart" attributed to reduced venous return from vasodilation and redistribution of intravascular volume from the central to the peripheral compartment.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
3. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
4. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
5. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
6. ('Epistaxis', 'REDIRECT Epistaxis')
7. ('angioedema', 'It is possible to have angioedema without hives.')
8. ('cataplexy', 'People who have cataplexy w


 53%|█████▎    | 18/34 [00:31<00:29,  1.83s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 102.45it/s]

 
query_sent:  Anaphylaxis can occur in response to almost any foreign substance.
query_word:  Anaphylaxis
Top substitutions:
1. ('Asphyxia', 'Asphyxia can injure or kill people.')
2. ('Angioedema', 'Angioedema may be caused by an allergic reaction.')
3. ('Trichomoniasis', "Trichomoniasis is only spread through sex, or if two people's genitals touch.")
4. ('Cyanosis', 'Cyanosis is a medical problem.')
5. ('Anosmia', 'Anosmia is losing the the sense of smell.')
6. ('Paraplegia', 'Paraplegia is paralysis of the legs and lower half of the body.')
7. ('Prosopagnosia', 'Prosopagnosia is not curable or treatable.')
8. ('Synesthesia', 'Synesthesia was investigated a lot in the 19th and early 20th centuries, but in the middle of the 20th century, it was less studied.')
9. ('Catalepsy', 'Catalepsy is a nervous condition.')
10. ('Tetraplegia', 'Tetraplegia is the term in Europe.')
11. ('Prognathism', 'Prognathism describes when part or all of the face sticks out more than normal.')



 56%|█████▌    | 19/34 [00:33<00:27,  1.86s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 102.03it/s]

 
query_sent:  Many foods can trigger anaphylaxis; this may occur upon the first known ingestion.
query_word:  anaphylaxis
Top substitutions:
1. ('anorexia', 'People who develop anorexia tend to be perfectionistic.')
2. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
3. ('Epistaxis', 'REDIRECT Epistaxis')
4. ('angioedema', 'It is possible to have angioedema without hives.')
5. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
6. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
7. ('antihistamines', 'Doctors often give antihistamines (which destroy histamine) and steroids along with epinephrine.')
8. ('Anterograde', 'Being drunk can cause Anterograde amnesia.')
9. ('anaemia', 'People affected may develop anaemia due to loss of blood.')
10. ('alexithymic', "HFA's are often alexithymic (


 59%|█████▉    | 20/34 [00:35<00:27,  1.94s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 102.82it/s]

 
query_sent:  People prone to anaphylaxis are advised to have an "allergy action plan."
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('anorexia', 'Another symptom of autism spectrum disorder is OCD, which has also been linked to anorexia.')
5. ('narcolepsy', 'Cataplexy often affects people who have narcolepsy.')
6. ('Epistaxis', 'REDIRECT Epistaxis')
7. ('angioedema', 'It is possible to have angioedema without hives.')
8. ('cataplexy', 'People who have cataplexy will sometimes see that some of their muscles suddenly fail them.')
9. ('anosmia', 'In general, anosmia which has its cause in the brain cannot be treated.')
10. ('anthrax', 'I


 62%|██████▏   | 21/34 [00:37<00:23,  1.81s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 103.18it/s]

 
query_sent:  The antitoxin could cause anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('antihistamines', 'Doctors often give antihistamines (which destroy histamine) and steroids along with epinephrine.')
5. ('Epistaxis', 'REDIRECT Epistaxis')
6. ('seizures', 'This can cause seizures.')
7. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
8. ('anorexics', 'To try to lose weight, anorexics do not eat enough.')
9. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
10. ('Anterograde', 'Being drunk can cause Anterograde amnesia.')
11. ('anthrax', '


 65%|██████▍   | 22/34 [00:38<00:21,  1.77s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.63it/s]

 
query_sent:  A food allergy explains the anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('Epistaxis', 'REDIRECT Epistaxis')
5. ('anorexia', 'There are various treatments for anorexia.')
6. ('ataxia', "Acquired means that something happened during the person's life to cause the ataxia.")
7. ('agranulocytosis', 'If the agranulocytosis is because of a lack of neutrophils (called neutropenia), it is especially bad.')
8. ('anorexics', 'To try to lose weight, anorexics do not eat enough.')
9. ('migraine', 'Abdominal migraine usually affects children starting at about age 7, but it may affect younger children and older children, and it may


 68%|██████▊   | 23/34 [00:39<00:17,  1.57s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 100.64it/s]

 
query_sent:  They die of dysentery.
query_word:  dysentery
Top substitutions:
1. ('diarrhea', 'Child deaths from diarrhea can be prevented in different ways.')
2. ('cholera', 'Between 1900 and 1920, in India, up to eight million people died of cholera.')
3. ('encephalitis', 'It is not usually considered life-threatening but in 1976 a girl of 5 years was prescribed with steroids at Great Ormond Street Hospital to lessen her acute arthritic pain and died at home on 30th December after a swift attack of encephalitis.')
4. ('Endocarditis', 'Endocarditis most often affects the heart valves.')
5. ('seizures', 'Many people have died from seizures.')
6. ('trichomoniasis', 'These problems do not happen to everyone with trichomoniasis.')
7. ('ciliopathy', 'It is a ciliopathy.')
8. ('polyphagia', 'This causes polyphagia.')
9. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
10. ('ketosis', 'Nevertheless, there are no studies directly monitorin


 71%|███████   | 24/34 [00:41<00:15,  1.58s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.60it/s]

 
query_sent:  Henry II moved in support of Richard, and Henry the Young King died from dysentery at the end of the campaign.
query_word:  dysentery
Top substitutions:
1. ('seizures', 'Many people have died from seizures.')
2. ('cholera', 'Between 1900 and 1920, in India, up to eight million people died of cholera.')
3. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
4. ('rhabdomyolysis', 'Use of fibrates and statins at the same time makes rhabdomyolysis happen much more often.')
5. ('pellagra', 'A person can die from pellagra if it is not treated.')
6. ('ketosis', 'Nevertheless, there are no studies directly monitoring the side effects of ketosis yet, hence it’s too early to conclude that the diet is completely safe for everyone.')
7. ('dyskinesia', 'But some people get tardive dyskinesia after taking these medicines for only 6 weeks.')
8. ('cystic', '65 roses " is how some children refer to their condition since cystic fibrosis is 


 74%|███████▎  | 25/34 [00:43<00:15,  1.69s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 102.18it/s]

 
query_sent:  Once at Richmond, Madison began drafting the Report, though he was delayed by a weeklong battle with dysentery.
query_word:  dysentery
Top substitutions:
1. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
2. ('Chlamydia', 'REDIRECT Chlamydia infection')
3. ('trichomoniasis', 'These problems do not happen to everyone with trichomoniasis.')
4. ('dysthymia', 'However, even though both disorders have similar symptoms, these symptoms are usually less severe with dysthymia.')
5. ('rhabdomyolysis', 'Rhabdomyosarcoma should not be confused with rhabdomyolysis, which is often called " rhabdo " for short.')
6. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
7. ('dyskinesia', 'But some people get tardive dyskinesia after taking these medicines for only 6 weeks.')
8. ('catatonia', 'Doctors can treat catatonia.')
9. ('picornaviruses', 'It shows promise against picornaviruses.')
10


 76%|███████▋  | 26/34 [00:45<00:14,  1.76s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.36it/s]

 
query_sent:  Psoriasis varies in severity from small, localized patches to complete body coverage.
query_word:  Psoriasis
Top substitutions:
1. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
2. ('Leprosy', 'Leprosy is an Contagious disease.')
3. ('Psychosis', 'Psychosis is not a disease but rather a name for a number of symptoms, that can be caused by different diseases and conditions.')
4. ('Syphilis', 'Syphilis that is not complicated can usually be treated and cured by antibiotic medications.')
5. ('Cataplexy', 'Cataplexy often affects people who have narcolepsy.')
6. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
7. ('Leishmaniasis', 'Leishmaniasis can be partly prevented by sleeping under nets treated with insecticide.')
8. ('Impetigo', 'Impetigo is a very contagious bacterial skin infection.')
9. ('Catatonia', 'Catatonia can also be


 79%|███████▉  | 27/34 [00:47<00:12,  1.83s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.50it/s]

 
query_sent:  Psoriasis is generally thought to be a genetic disease that is triggered by environmental factors.
query_word:  Psoriasis
Top substitutions:
1. ('Psychosis', 'Psychosis is not a disease but rather a name for a number of symptoms, that can be caused by different diseases and conditions.')
2. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
3. ('Leprosy', 'Leprosy is an Contagious disease.')
4. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
5. ('Nocardiosis', 'Nocardiosis is a disease caused by the bacterium Nocardia Asteroides.')
6. ('Syphilis', 'Syphilis that is not complicated can usually be treated and cured by antibiotic medications.')
7. ('Psittacosis', 'Psittacosis is a bacterial disease that can be caught from birds such as parrots, pigeons, and parakeets.')
8. ('Pyromania', 'Pyromania is an impulse control disorder invol


 82%|████████▏ | 28/34 [00:49<00:11,  1.84s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 100.77it/s]

 
query_sent:  There are five main types of psoriasis: plaque, guttate, inverse, pustular, and erythrodermic.
query_word:  psoriasis
Top substitutions:
1. ('leprosy', 'The symptoms of leprosy are irregular spots and patches on the skin.')
2. ('psoriatic', 'Other kinds of arthritis include psoriatic arthritis and septic arthritis (when an area is invaded by bacteria).')
3. ('sarcoidosis', 'In some cases, pulmonary fibrosis is a complication of sarcoidosis.')
4. ('cellulite', 'The appearance of cellulite has been linked to hormonal changes, changes in diet, stress, and poor blood circulation.')
5. ('rosacea', 'There are four types of rosacea, three involving human skin and the fourth affecting eyes.')
6. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
7. ('angioedema', 'In most cases, the cause of angioedema is never found.')
8. ('leishmaniasis', 'The differences in the type of tissue affected are responsible for


 85%|████████▌ | 29/34 [00:51<00:09,  1.83s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 107.64it/s]

 
query_sent:  If one twin has psoriasis, the other twin is three times more likely to be affected if the twins are identical than if they are non-identical.
query_word:  psoriasis
Top substitutions:
1. ('angioedema', 'It is possible to have angioedema without hives.')
2. ('xeroderma', 'People with xeroderma pigmentosum are about 1,000 times more likely to get skin cancer.')
3. ('psychosis', 'Also, if a person has psychosis that lasts longer than their mania, they may have schizoaffective disorder.')
4. ('psoriatic', 'Other kinds of arthritis include psoriatic arthritis and septic arthritis (when an area is invaded by bacteria).')
5. ('bulimia', 'Most people who have bulimia are between the ages of 10 to 25.')
6. ('phobia', 'Anxiety conditions include phobia, social anxiety disorder and generalized anxiety disorder.')
7. ('anorexia', 'Doctors use the Diagnostic and Statistical Manual of Mental Disorders to make a medical diagnosis and decide if a person has anorexia or not.')
8. ('tric


 88%|████████▊ | 30/34 [00:52<00:06,  1.65s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 103.45it/s]

 
query_sent:  High doses may lead to muscles contractions.
query_word:  contractions
Top substitutions:
1. ('spasms', 'The twitching is caused by the spasms.')
2. ('pain', 'This can cause weakness and muscle pain.')
3. ('paralysis', 'Studies suggest that many people get sleep paralysis at least once in their lives.')
4. ('twitches', 'Hypnic jerks are one form of involuntary muscle twitches called myoclonus.')
5. ('seizures', 'Coma, many seizures, or long delirium   If no treatment is given, death often occurs.')
6. ('jerks', 'Benzodiazepines can make muscle jerks happen less often.')
7. ('twitching', 'The twitching is caused by the spasms.')
8. ('movements', 'This medicine helps slow some of the restless movements of chorea.')
9. ('weakness', 'This results in muscular weakness. "')
10. ('work', 'Stress can also make the cremasteric reflex work.')
11. ('sores', 'Impetigo can also be called school sores.')
12. ('rhabdomyolysis', 'Fibrates can have rhabdomyolysis like the statins and als


 91%|█████████ | 31/34 [00:53<00:04,  1.57s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 103.81it/s]

 
query_sent:  High doses may lead to convulsions.
query_word:  convulsions
Top substitutions:
1. ('seizures', 'This can cause seizures.')
2. ('vertigo', 'BPVC causes vertigo.')
3. ('hallucinations', 'Certain drugs can cause hallucinations.')
4. ('anticonvulsant', 'For example, patients who have seizures may be given anticonvulsant medications.')
5. ('anticonvulsants', 'Other medicines that can help with psychiatric symptoms and movements are benzodiazepines and anticonvulsants usually taken for seizures.')
6. ('dyskinesia', 'But some other people can get tardive dyskinesia too.')
7. ('myoclonus', 'Hypnic jerks are one form of involuntary muscle twitches called myoclonus.')
8. ('cerebellar', 'For example, if a person has a brain tumor in their cerebellum, they may get cerebellar ataxia.')
9. ('neurological', 'Some forms of chorea are not linked to neurological conditions, though.')
10. ('contagious', 'All kinds of tonsillitis are contagious.')
11. ('tonsillitis', 'There are treatments 


 94%|█████████▍| 32/34 [00:55<00:03,  1.54s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.07it/s]

 
query_sent:  To reduce the encephalocele cyst.
query_word:  encephalocele
Top substitutions:
1. ('encephalitis', 'This is called " secondary encephalitis. "')
2. ('Encephalopathy', 'Encephalopathy means problem of the brain.')
3. ('encephalomyelitis', 'Neurological disorders: These are problems with the brain and the nerves, like Guillain–Barré syndrome and Post-dengue acute disseminated encephalomyelitis.')
4. ('ciliopathy', 'It is a ciliopathy.')
5. ('cephalic', 'It is a type of cephalic disorder.')
6. ('cranium', 'Also, mountain climbers at high altitude have been known to experience cerebral oedema primarily due to reduced air pressure acting on the cranium.')
7. ('ciliopathies', 'The underlying cause of the ciliopathies may be defects in the mechanism of cilia in the cell.')
8. ('tonsillitis', 'There are treatments for tonsillitis.')
9. ('cremasteric', 'Stress can also make the cremasteric reflex work.')



 97%|█████████▋| 33/34 [00:57<00:01,  1.72s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 101.41it/s]

 
query_sent:  Epistaxis in children is usually from Little's area, which is on the septal wall anteriorly.
query_word:  Epistaxis
Top substitutions:
1. ('Anaphylaxis', 'Anaphylaxis has many symptoms, such as an itchy rash, throat swelling, breathing problems, and low blood pressure.')
2. ('Anosmia', 'Anosmia is losing the the sense of smell.')
3. ('Prognathism', 'Prognathism: part of the face sticks out more than normal usually the lower jaw.')
4. ('Epidemic', 'Epidemic typhus')
5. ('Angioedema', 'Angioedema may be caused by an allergic reaction.')
6. ('Agoraphobia', 'Agoraphobia is a medical condition that causes anxiety.')
7. ('Anthrax', 'Anthrax, or splenic fever, is a disease.')
8. ('Catalepsy', 'Catalepsy is a nervous condition.')
9. ('Trichomoniasis', "Trichomoniasis is only spread through sex, or if two people's genitals touch.")
10. ('Scaphocephaly', 'Scaphocephaly is a medical disorder.')
11. ('Arachnophobia', 'Arachnophobia is a fear of spiders.')



100%|██████████| 34/34 [00:59<00:00,  1.74s/it]

 
query_sent:  Once epistaxis occurs, the importance of the first treatment for the haemostasis should be emphasized.
query_word:  epistaxis
Top substitutions:
1. ('anaphylaxis', 'Epinephrine is the best and first treatment used for anaphylaxis.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
4. ('episodic', 'Some people who start off getting episodic migraines may start to get chronic migraines later.')
5. ('ectoparasite', 'The mite is an ectoparasite.')
6. ('phobias', 'Causes and risk factors phobias is very.')
7. ('Psittacosis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the cornea.')
8. ('trichomoniasis', 'Treatment for trichomoniasis is usually metronidazole (Flagyl), which kills protozoa like Trichomonas vaginalis.')
9. ('anaphylactic', 'They usually have fewer anaphylactic episodes, which are less severe, as they get older.')




### Ranking > selection

In [11]:
def output_sr_ss(query_sent, query_word, k):

    tokens, words, position = MLM_LS.convert_sentence_to_token(sentence=query_sent, tokenizer=tokenizer, seq_length=256)

    #print("tokens: ", tokens)
    #print("words: ", words)
    #print("position: ", position)

    distances, neighbors, contexts= tree_copy.query(query_sent=query_sent, query_word=query_word, k=k, filter_same_word=True)

    closest_candidates = list(zip(neighbors, contexts))
    #print(" ")
    #print(len(closest_candidates))
    #print(closest_candidates)
    #print(" ")

    mask_index = words.index(query_word)
    #print("mask_index: ", mask_index)
    window_context = 11
    mask_context = MLM_LS.extract_context(words, mask_index, window_context)

    sr = substitution_ranking(source_word=query_word,
                            source_context=mask_context,
                            substitution_selection=closest_candidates,
                            fasttext_dico=fasttext_dico,
                            fasttext_emb=fasttext_emb,
                            word_count=word_count,
                            tokenizer=tokenizer,
                            maskedLM=model)

    ss = substitution_selection(source_word=query_word,
                                closest_candidates=sr,
                                stopwords=stopword_list,
                                ps=ps,
                                num_selection=num_selection)
    #print(len(ss))
    #print(ss)
    
    print(" ")
    print("query_sent: ", query_sent)
    print("query_word: ", query_word)
    print("Top substitutions:")
    for i, item in enumerate(ss):
        print(f'{i+1}. {item}')


In [19]:
for query_sent, query_word in tqdm(sentences):
    output_sr_ss(query_sent, query_word, k=50)


  0%|          | 0/29 [00:00<?, ?it/s][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 54.55it/s]

  3%|▎         | 1/29 [00:07<03:35,  7.71s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 103.73it/s]

 
query_sent:  Ho was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.
query_word:  cardiomyopathy
Top substitutions:
1. ('arrhythmia', 'Heart   Heart disease   Cardiac arrhythmia')
2. ('tachycardia', 'Some people with ventricular tachycardia do not have any symptoms, especially if the V-tach lasts for only a few seconds.')
3. ('ventricular', 'This is called idiopathic ventricular tachycardia.')
4. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
5. ('rhabdomyosarcoma', 'For example, adults with rhabdomyosarcoma usually have tumors that grow faster and are harder to treat.')
6. ('dystrophy', 'There is no cure for myotonic dystrophy.')
7. ('myocardial', 'redirect myocardial infarction')
8. ('bradycardia', 'Also, if bradycardia gets very bad, the heart will be unable to pump enough blood and oxygen to the body.')
9. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart 



  7%|▋         | 2/29 [00:14<03:20,  7.43s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 101.20it/s]

 
query_sent:  His echocardiogram was negative for cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
2. ('pathophysiology', 'This change has occurred because the older dichotomous classification did not reflect pathophysiology or outcome.')
3. ('dystrophy', 'There is no cure for myotonic dystrophy.')
4. ('tachycardia', 'A doctor can diagnose ventricular tachycardia by doing an electrocardiogram (also called an ECG or an EKG).')
5. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
6. ('arrhythmia', 'Heart   Heart disease   Cardiac arrhythmia')
7. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
8. ('myocardial', 'redirect myocardial infarction')
9. ('ventricular', 'This is called idiopathic ventricular tachycardia.'



 10%|█         | 3/29 [00:21<03:11,  7.38s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 99.03it/s]

 
query_sent:  And a subsequent biopsy revealed irreversible cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('dystrophy', 'There is no cure for myotonic dystrophy.')
2. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
3. ('fibrosis', 'In some patients the cause of the disease can be diagnosed, but in others the cause is unknown, a condition called idiopathic pulmonary fibrosis.')
4. ('myocardial', 'redirect myocardial infarction')
5. ('rhabdomyolysis', 'Use of fibrates and statins at the same time makes rhabdomyolysis happen much more often.')
6. ('ventricular', 'This is called idiopathic ventricular tachycardia.')
7. ('rhabdomyosarcoma', 'This type of rhabdomyosarcoma usually grows faster than embryonal rhabdomyosarcoma, and needs more treatment than ERMS.')
8. ('ciliopathy', 'It is a ciliopathy.')
9. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not gettin



 14%|█▍        | 4/29 [00:26<02:44,  6.60s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 59.36it/s]

 
query_sent:  The kid has severe cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('dystrophy', 'There is no cure for myotonic dystrophy.')
2. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
3. ('myocardial', 'redirect myocardial infarction')
4. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
5. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
6. ('ciliopathy', 'It is a ciliopathy.')
7. ('Endocarditis', 'Endocarditis most often affects the heart valves.')
8. ('bronchiectasis', 'People with bronchiectasis have swollen bronchi.')
9. ('tachycardia', 'Ventricular tachycardia causes most of the sudden cardiac deaths in the United States.')
10. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
11. ('leukemia', 'Chronic leukemia grows slow



 17%|█▋        | 5/29 [00:34<02:47,  6.96s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 94.63it/s]

 
query_sent:  Which means it could be a hematological problem plus cardiomyopathy
query_word:  cardiomyopathy
Top substitutions:
1. ('Myopathy', 'Myopathy " simply means muscle disease (Greek myo- " muscle " + patheia < -pathy " suffering " ).')
2. ('rhabdomyolysis', 'Use of fibrates and statins at the same time makes rhabdomyolysis happen much more often.')
3. ('ciliopathy', 'It is a ciliopathy.')
4. ('endocarditis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the cornea.')
5. ('myocardial', 'redirect myocardial infarction')
6. ('cardiotoxicity', 'Large doses of quinine can cause:   skin rashes   deafness   sleepiness   blindness   anaphylactic shock   disturbances in abnormal heart rhythm   death from cardiotoxicity.')
7. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
8. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting



 21%|██        | 6/29 [00:41<02:44,  7.16s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 68.92it/s]

 
query_sent:  An acute myocardial infarction, happens when a blood vessel in the heart suddenly becomes blocked.
query_word:  myocardial
Top substitutions:
1. ('coronary', 'Cardiovascular disease includes coronary artery diseases (CAD) such as angina and myocardial infarction (commonly known as a heart attack).')
2. ('ischemic', 'The most common cause is ischemic heart disease (heart disease that causes the heart to not get enough oxygen).')
3. ('infarction', 'An acute myocardial infarction, also called a heart attack, happens when a blood vessel in the heart suddenly becomes blocked.')
4. ('ischemia', 'If the ischemia lasts long enough, the heart muscle that is not getting enough oxygen dies.')
5. ('cardiomyopathy', 'Problems caused by this may include heart failure due to cardiomyopathy or cardiac arrest.')
6. ('myocardium', 'The most important thing is to save as much myocardium (heart muscle) as possible and prevent more complications.')
7. ('Myocarditis', 'Myocarditis, also known



 24%|██▍       | 7/29 [00:49<02:41,  7.33s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 97.73it/s]

 
query_sent:  Patau syndrome is a syndrome caused by a chromosomal abnormality, in which some or all of the cells of the body contain extra genetic material from chromosome 13.
query_word:  Patau
Top substitutions:
1. ('Rett', 'Rett syndrome is an almost entirely female based disorder which affects girls.')
2. ('Spinal', 'Spinal muscular atrophy is inherited from parents who might or might not have the disease themselves but both of whom have a fault (mutation) in their genes that predisposes to the disease.')
3. ('Fragile', 'Fragile X syndrome occurs as a result of a mutation of the FMR1 gene on the X chromosome.')
4. ('Chagas', 'Chagas disease is a disease caused by the protozoan parasite Trypanosoma cruzi.')
5. ('Pellagra', 'Pellagra is a disease caused by a lack of niacin (vitamin B3).')
6. ('Turner', 'Turner syndrome, also known as 45 X, is a genetic disorder of human females.')
7. ('Tourette', 'Tourette syndrome is an inherited neurological disorder.')
8. ('ASD', 'ASD is a condi



 28%|██▊       | 8/29 [00:55<02:23,  6.84s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 98.08it/s]

 
query_sent:  Most cases of Patau syndrome are not inherited, but occur as random events during the formation of reproductive cells (eggs and sperm).
query_word:  Patau
Top substitutions:
1. ('SIDS', 'Scientists think that about 10 to 20% of SIDS cases are caused by inherited defects in the ion channels that help the heart squeeze out blood.')
2. ('anemia', 'Dimorphic anemia means two types of anemia at the same time.')
3. ('chorea', 'Some forms of chorea are not linked to neurological conditions, though.')
4. ('Parkinson', "Doctors are studying the exact causes of Parkinson's.")
5. ('Renal', 'Renal medullary carcinoma is a rare form of Renal cancer that occurs most often in patients with Sickle cell trait or Sickle cell disease.')
6. ('malnutrition', 'Marasmus is a form of malnutrition.')
7. ('CIDP', 'Specially important are:   An asymmetrical variant of CIDP is known as Lewis-Sumner Syndrome.')
8. ('Autism', 'There may be many different causes for the different types of Autism Spect



 31%|███       | 9/29 [01:01<02:12,  6.61s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 102.20it/s]

 
query_sent:  But a baby with Patau syndrome has 3 copies of chromosome 13, instead of 2.
query_word:  Patau
Top substitutions:
1. ('schizophrenia', 'The risk is even higher if you have an identical twin with schizophrenia.')
2. ('Rett', 'People with Rett syndrome also have scoliosis.')
3. ('POTS', 'People with POTS also have other symptoms of orthostatic intolerance (symptoms that get worse when a person is standing up, and get better when the person is lying down).')
4. ('trisomy', 'People with trisomy 18 have three copies of chromosome 18. "')
5. ('GDM', 'Some women with GDM are treated with drugs.')
6. ('DKA', 'People with DKA need to be treated in a hospital.')
7. ('chorea', 'People with chorea move without being able to control those movements.')
8. ('BPD', 'Many people with BPD were abused when they were children.')
9. ('Tourette', "Most people with Tourette's do not need treatment.")
10. ('catatonia', 'A person with catatonia may look to be in a stupor (being mentally numb and



 34%|███▍      | 10/29 [01:06<01:57,  6.20s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 103.27it/s]

 
query_sent:  Nora is diagnosed with Trisomy 13.
query_word:  Trisomy
Top substitutions:
1. ('Leukemia', 'Leukemia & Lymphoma Society website')
2. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
3. ('Leukodystrophy', 'Leukodystrophy is a gene defect.')
4. ('autosomal', 'It is the second most common autosomal trisomy, after Down Syndrome, that carries to term.')
5. ('aneuploidy', 'Among those who survive birth, Down syndrome is the most common form of aneuploidy.')
6. ('chromosomes', 'Patau Sydrome, also known as Trisomy 13 or Trisomy D is a problem with the chromosomes.')
7. ('Polyploidy', 'Trisomy   Polyploidy')
8. ('Myelodysplastic', 'The Myelodysplastic syndrome is a group of diseases and conditions that affect how blood is made.')
9. ('Diabetes', 'REDIRECT Diabetes mellitus')
10. ('Psychotic', 'Psychotic Disorders.')
11. ('recessive', 'The disorder is X-linked recessive.')
12. ('bronchiectasis', 'People can be b



 38%|███▊      | 11/29 [01:10<01:40,  5.59s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 99.68it/s]

 
query_sent:  The patient has trisomy 13.
query_word:  trisomy
Top substitutions:
1. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
2. ('bipolar', 'If one identical twin has bipolar disorder, there is a 40 to 70 percent chance that the other will develop bipolar disorder in their lifetime.')
3. ('anemia', 'The infant has anemia   SIDS only happens at certain ages.')
4. ('sarcoma', 'As a result, there are many subtypes of sarcoma.')
5. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
6. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
7. ('bulimia', 'Most people who have bulimia are between the ages of 10 to 25.')
8. ('subtypes', 'There is a debate on the subtypes.')
9. ('osteosarcoma', "Examples of bone tumors include osteosarcoma and Ewing's sarcoma.")
10. ('leukemia', 'Chronic leukemia grows slowly.')
11. ('diploid', 'Humans are diploid organisms.')



 41%|████▏     | 12/29 [01:15<01:27,  5.17s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 53.20it/s]

 
query_sent:  The patient has trisomy 21.
query_word:  trisomy
Top substitutions:
1. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
2. ('leukemia', 'Chronic leukemia grows slowly.')
3. ('bipolar', 'If one identical twin has bipolar disorder, there is a 40 to 70 percent chance that the other will develop bipolar disorder in their lifetime.')
4. ('bulimia', 'Most people who have bulimia are between the ages of 10 to 25.')
5. ('anemia', 'The infant has anemia   SIDS only happens at certain ages.')
6. ('sarcoma', 'As a result, there are many subtypes of sarcoma.')
7. ('diploid', 'Humans are diploid organisms.')
8. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
9. ('chromosomal', "It is also possible for chromosomal problems to happen because of a problem with a parent's genes.")
10. ('polyuria', 'This causes polyuria.')
11. ('subtypes', 'There is a debate on the subtypes.')
12. ('osteosarcoma', "Examples of bone tumors i



 45%|████▍     | 13/29 [01:22<01:31,  5.72s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 92.27it/s]

 
query_sent:  Rarely very low blood pressure may be the only sign of anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'There is no reason why it should not be used if a person is having an anaphylactic reaction.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('anorexia', 'Four signs of anorexia are listed in the manual.')
5. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
6. ('ischemia', 'Mesenteric ischemia is a medical condition.')
7. ('narcolepsy', 'This is the most common symptom of narcolepsy.')
8. ('phobias', 'Causes and risk factors phobias is very.')
9. ('Epistaxis', 'REDIRECT Epistaxis')
10. ('anthrax', 'If not treated, anthrax often leads to death.')
11. ('neurosarcoidosis', 'If it does, it is called neurosarcoidosis.')
12. ('anorexics', 'To try to lose we



 48%|████▊     | 14/29 [01:28<01:31,  6.09s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 96.17it/s]

 
query_sent:  Worldwide, 0.05-2% of the population is estimated to have anaphylaxis at some point in life, and rates appear to be increasing.
query_word:  anaphylaxis
Top substitutions:
1. ('angioedema', 'It is possible to have angioedema without hives.')
2. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
3. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
4. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
5. ('ketoacidosis', 'Although ketoacidosis – acidification of the blood due to pathological levels of ketones – was historically proposed as a side effect, nutritional ketosis simply cannot achieve the level of ketones required to induce this life-threatening state.')
6. ('anaphylactic', 'However, half of the people who die of anaphylaxis have had no anaphylactic reaction before.')
7. ('allergic', 'For example,



 52%|█████▏    | 15/29 [01:36<01:32,  6.61s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 96.39it/s]

 
query_sent:  On a mechanistic level, anaphylaxis is caused by the release of mediators from certain types of white blood cells triggered either by immunologic or non-immunologic mechanisms.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('anorexia', 'Many people think that anorexia is caused by wanting to be thin like models in magazines.')
3. ('angioedema', 'A form of angioedema happens in families and has different triggers, complications, and treatments.')
4. ('ischemia', 'Mesenteric ischemia is a medical condition.')
5. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
6. ('antihistamines', 'Doctors often give antihistamines (which destroy histamine) and steroids along with epinephrine.')
7. ('ataxia', 'Instead, ataxia is caused by a neurological problem.')
8. (



 55%|█████▌    | 16/29 [01:43<01:27,  6.71s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 97.51it/s]

 
query_sent:  Previous systemic reactions, which are anything more than a local reaction around the site of the sting, are a risk factor for future anaphylaxis; however, half of fatalities have had no previous systemic reaction.
query_word:  anaphylaxis
Top substitutions:
1. ('angioedema', 'It is possible to have angioedema without hives.')
2. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
3. ('migraine', 'The exact causes of menstrual migraine are not known for sure but there is a link between falling levels of the female hormone estrogen and the onset of a migraine attack.')
4. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
5. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
6. ('allergic', 'Anaphylaxis is a serious allergic reaction.')
7. ('Epistaxis',



 59%|█████▊    | 17/29 [01:50<01:21,  6.82s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 99.04it/s]

 
query_sent:  In a person who died from anaphylaxis, autopsy may show an "empty heart" attributed to reduced venous return from vasodilation and redistribution of intravascular volume from the central to the peripheral compartment.
query_word:  anaphylaxis
Top substitutions:
1. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
2. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
3. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
4. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
5. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
6. ('episodic', 'Some people who start off getting episodic migraines may start to get chronic migraines later.')
7. ('angioedema', 'It is possible to hav



 62%|██████▏   | 18/29 [01:58<01:17,  7.03s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 96.47it/s]

 
query_sent:  Anaphylaxis can occur in response to almost any foreign substance.
query_word:  Anaphylaxis
Top substitutions:
1. ('Asphyxia', 'Asphyxia can injure or kill people.')
2. ('Narcolepsy', 'Narcolepsy affects the way the nerves work.')
3. ('Angioedema', 'Angioedema may be caused by an allergic reaction.')
4. ('Trichomoniasis', "Trichomoniasis is only spread through sex, or if two people's genitals touch.")
5. ('Cyanosis', 'Cyanosis is a medical problem.')
6. ('Syphilis', 'Syphilis that is not complicated can usually be treated and cured by antibiotic medications.')
7. ('Anosmia', 'Anosmia is losing the the sense of smell.')
8. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
9. ('Anthrax', 'Anthrax, or splenic fever, is a disease.')
10. ('Cataplexy', 'Cataplexy often affects people who have narcolepsy.')
11. ('Hyperhidrosis', 'Hyperhidrosis: when a person sweats too much.')
12. ('Paraplegia', 'Paraplegia is 



 66%|██████▌   | 19/29 [02:05<01:10,  7.01s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 91.58it/s]

 
query_sent:  Many foods can trigger anaphylaxis; this may occur upon the first known ingestion.
query_word:  anaphylaxis
Top substitutions:
1. ('anorexia', 'People who develop anorexia tend to be perfectionistic.')
2. ('Anxiety', 'Anxiety is one of the most common symptoms and can lead the individual to believe that if a task is not completed, something bad may happen.')
3. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
4. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
5. ('seizures', 'This can cause seizures.')
6. ('angioedema', 'It is possible to have angioedema without hives.')
7. ('Epistaxis', 'REDIRECT Epistaxis')
8. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
9. ('antihistamines', 'Doctors often give antihistamines (which destroy histamine) and steroids along with epi



 69%|██████▉   | 20/29 [02:13<01:05,  7.25s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 56.45it/s]

 
query_sent:  People prone to anaphylaxis are advised to have an "allergy action plan."
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('amyloidosis', 'Michael York who has amyloidosis, speaks from 4:00-60:00')
3. ('anorexia', 'Another symptom of autism spectrum disorder is OCD, which has also been linked to anorexia.')
4. ('narcolepsy', 'Cataplexy often affects people who have narcolepsy.')
5. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
6. ('angioedema', 'It is possible to have angioedema without hives.')
7. ('Epistaxis', 'REDIRECT Epistaxis')
8. ('cataplexy', 'People who have cataplexy will sometimes see that some of their muscles suddenly fail them.')
9. ('allergic', 'Anaphylaxis is a serious allergic reaction.')
10. ('anosmia', 'In general, anosmia which ha



 72%|███████▏  | 21/29 [02:18<00:54,  6.76s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 82.30it/s]

 
query_sent:  The antitoxin could cause anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('antihistamines', 'Doctors often give antihistamines (which destroy histamine) and steroids along with epinephrine.')
4. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
5. ('peritonitis', 'This last complication may cause peritonitis.')
6. ('anorexia', 'People do not have to show all four signs for the doctor to decide that they have anorexia.')
7. ('seizures', 'This can cause seizures.')
8. ('Epistaxis', 'REDIRECT Epistaxis')
9. ('hyperkalemia', 'Many medications can cause hyperkalemia, and it is much more likely to occur in people kidney problems.')
10. ('sepsis', 'This delivery system can cause



 76%|███████▌  | 22/29 [02:24<00:45,  6.54s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 67.78it/s]

 
query_sent:  A food allergy explains the anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('anorexia', 'There are various treatments for anorexia.')
4. ('allergy', 'redirect allergy')
5. ('anaphylatoxins', 'It is caused by the release (degranulation) of substances from mast cells or basophils because of anaphylatoxins.')
6. ('ataxia', "Acquired means that something happened during the person's life to cause the ataxia.")
7. ('narcolepsy', 'Some scientists think that narcolepsy is caused by a lack of hypocretin.')
8. ('Epistaxis', 'REDIRECT Epistaxis')
9. ('agranulocytosis', 'If the agranulocytosis is because of a lack of neutrophils (called neutropenia), it is especially bad.')
10. ('migraine', 'Abdominal migraine usually affects children starting at about age 7, but it 



 79%|███████▉  | 23/29 [02:29<00:35,  5.88s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 86.39it/s]

 
query_sent:  They die of dysentery.
query_word:  dysentery
Top substitutions:
1. ('encephalitis', 'It is not usually considered life-threatening but in 1976 a girl of 5 years was prescribed with steroids at Great Ormond Street Hospital to lessen her acute arthritic pain and died at home on 30th December after a swift attack of encephalitis.')
2. ('diarrhea', 'Child deaths from diarrhea can be prevented in different ways.')
3. ('cholera', 'Between 1900 and 1920, in India, up to eight million people died of cholera.')
4. ('appendicitis', 'Most serious appendicitis happens to younger children and elder adults.')
5. ('Diverticulitis', 'Diverticulitis is a disease of the digestive system.')
6. ('endocarditis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the cornea.')
7. ('seizures', 'Many people have died from seizures.')
8. ('trichomoniasis', 'These problems do not happen to everyone with trichomoniasis.')
9. ('narcolepsy', 'It is estimated th



 83%|████████▎ | 24/29 [02:36<00:31,  6.25s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 97.47it/s]

 
query_sent:  Henry II moved in support of Richard, and Henry the Young King died from dysentery at the end of the campaign.
query_word:  dysentery
Top substitutions:
1. ('seizures', 'Many people have died from seizures.')
2. ('cholera', 'Between 1900 and 1920, in India, up to eight million people died of cholera.')
3. ('rhabdomyolysis', 'Use of fibrates and statins at the same time makes rhabdomyolysis happen much more often.')
4. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
5. ('polydipsia', 'Because the person is urinating so much, they get dehydrated easily and get very thirsty (polydipsia).')
6. ('pellagra', 'A person can die from pellagra if it is not treated.')
7. ('rheumatic', 'Wolfgang Mozart, the famous composer famously died of rheumatic fever.')
8. ('endocarditis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the cornea.')
9. ('diarrhea', 'Child deaths from diarrhea can be p



 86%|████████▌ | 25/29 [02:43<00:26,  6.66s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 96.30it/s]

 
query_sent:  Once at Richmond, Madison began drafting the Report, though he was delayed by a weeklong battle with dysentery.
query_word:  dysentery
Top substitutions:
1. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
2. ('syphilis', 'However, syphilis is still very dangerous.')
3. ('Chlamydia', 'REDIRECT Chlamydia infection')
4. ('rhabdomyolysis', 'Rhabdomyosarcoma should not be confused with rhabdomyolysis, which is often called " rhabdo " for short.')
5. ('trichomoniasis', 'These problems do not happen to everyone with trichomoniasis.')
6. ('polydipsia', 'Because the person is urinating so much, they get dehydrated easily and get very thirsty (polydipsia).')
7. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
8. ('dysthymia', 'However, even though both disorders have similar symptoms, these symptoms are usually less severe with dysthymia.')
9. ('Encephalopathy', 'Encephalopathy 



 90%|████████▉ | 26/29 [02:51<00:20,  6.81s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 95.46it/s]

 
query_sent:  Psoriasis varies in severity from small, localized patches to complete body coverage.
query_word:  Psoriasis
Top substitutions:
1. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
2. ('Psychosis', 'Psychosis is not a disease but rather a name for a number of symptoms, that can be caused by different diseases and conditions.')
3. ('Leprosy', 'Leprosy is an Contagious disease.')
4. ('Syphilis', 'Syphilis that is not complicated can usually be treated and cured by antibiotic medications.')
5. ('Schizophrenia', 'Schizophrenia usually appears earlier in men.')
6. ('Narcolepsy', 'Narcolepsy affects the way the nerves work.')
7. ('Impetigo', 'Impetigo is a very contagious bacterial skin infection.')
8. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
9. ('Nocardiosis', 'Nocardiosis is a disease caused by the bacterium Nocardia Asteroide



 93%|█████████▎| 27/29 [02:58<00:13,  6.90s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 95.74it/s]

 
query_sent:  Psoriasis is generally thought to be a genetic disease that is triggered by environmental factors.
query_word:  Psoriasis
Top substitutions:
1. ('Psychosis', 'Psychosis is not a disease but rather a name for a number of symptoms, that can be caused by different diseases and conditions.')
2. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
3. ('Syphilis', 'Syphilis that is not complicated can usually be treated and cured by antibiotic medications.')
4. ('Schizophrenia', 'Schizophrenia usually appears earlier in men.')
5. ('Leprosy', 'Leprosy is an Contagious disease.')
6. ('Nocardiosis', 'Nocardiosis is a disease caused by the bacterium Nocardia Asteroides.')
7. ('Psychopathy', 'Psychopathy is a personality disorder or antisocial personality disorder.')
8. ('Neurosyphilis', 'Neurosyphilis can also appear later, usually four to 25 years after a person first got syphilis.')
9. ('Narcolepsy', 'Narcole



 97%|█████████▋| 28/29 [03:05<00:07,  7.08s/it][AINFO:root:Use pytorch device: cuda


Batches: 100%|██████████| 1/1 [00:00<00:00, 100.30it/s]

 
query_sent:  There are five main types of psoriasis: plaque, guttate, inverse, pustular, and erythrodermic.
query_word:  psoriasis
Top substitutions:
1. ('leprosy', 'The symptoms of leprosy are irregular spots and patches on the skin.')
2. ('sarcoidosis', 'In some cases, pulmonary fibrosis is a complication of sarcoidosis.')
3. ('acne', 'There are lots of acne treatment methods available, including natural treatments.')
4. ('cellulite', 'The appearance of cellulite has been linked to hormonal changes, changes in diet, stress, and poor blood circulation.')
5. ('psoriatic', 'Other kinds of arthritis include psoriatic arthritis and septic arthritis (when an area is invaded by bacteria).')
6. ('Melanoma', 'Melanoma is prominent in New Zealand and Australia due to the hole in the ozone layer and the many beaches.')
7. ('rosacea', 'There are four types of rosacea, three involving human skin and the fourth affecting eyes.')
8. ('angioedema', 'In most cases, the cause of angioedema is never 



100%|██████████| 29/29 [03:12<00:00,  6.63s/it][A

 
query_sent:  If one twin has psoriasis, the other twin is three times more likely to be affected if the twins are identical than if they are non-identical.
query_word:  psoriasis
Top substitutions:
1. ('schizophrenia', 'Prevention is difficult because there is no reliable way to find out in advance who will get schizophrenia.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('psychosis', 'Also, if a person has psychosis that lasts longer than their mania, they may have schizoaffective disorder.')
4. ('xeroderma', 'People with xeroderma pigmentosum are about 1,000 times more likely to get skin cancer.')
5. ('acne', 'More females are getting acne than males (9.8% versus 9.0%).')
6. ('rosacea', 'Most people with rosacea have only mild redness and are never officially diagnosed or treated.')
7. ('bipolar', 'If one identical twin has bipolar disorder, there is a 40 to 70 percent chance that the other will develop bipolar disorder in their lifetime.')
8. ('psoriat




In [15]:
for query_sent, query_word in tqdm(sentences):
    output_sr_ss(query_sent, query_word, k=50)

  0%|          | 0/5 [00:00<?, ?it/s]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 11.87it/s]
 20%|██        | 1/5 [00:04<00:16,  4.13s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.21it/s]

 
query_sent:  High doses may lead to muscles contractions.
query_word:  contractions
Top substitutions:
1. ('spasms', 'The twitching is caused by the spasms.')
2. ('paralysis', 'Studies suggest that many people get sleep paralysis at least once in their lives.')
3. ('pain', 'This can cause weakness and muscle pain.')
4. ('movements', 'This medicine helps slow some of the restless movements of chorea.')
5. ('seizures', 'Coma, many seizures, or long delirium   If no treatment is given, death often occurs.')
6. ('twitches', 'Hypnic jerks are one form of involuntary muscle twitches called myoclonus.')
7. ('twitching', 'The twitching is caused by the spasms.')
8. ('muscle', 'The condition leads to general muscle stiffness and spasms in other parts of the body.')
9. ('weakness', 'This results in muscular weakness. "')
10. ('jerks', 'Benzodiazepines can make muscle jerks happen less often.')
11. ('work', 'Stress can also make the cremasteric reflex work.')
12. ('cells', 'Statins can cause da


 40%|████      | 2/5 [00:08<00:12,  4.31s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 104.02it/s]

 
query_sent:  High doses may lead to convulsions.
query_word:  convulsions
Top substitutions:
1. ('seizures', 'This can cause seizures.')
2. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
3. ('hallucinations', 'Certain drugs can cause hallucinations.')
4. ('myoclonus', 'Hypnic jerks are one form of involuntary muscle twitches called myoclonus.')
5. ('vertigo', 'BPVC causes vertigo.')
6. ('dyskinesia', 'But some other people can get tardive dyskinesia too.')
7. ('neurological', 'Some forms of chorea are not linked to neurological conditions, though.')
8. ('anticonvulsant', 'For example, patients who have seizures may be given anticonvulsant medications.')
9. ('anticonvulsants', 'Other medicines that can help with psychiatric symptoms and movements are benzodiazepines and anticonvulsants usually taken for seizures.')
10. ('cerebellar', 'For example, if a person has a brain tumor in their cerebellum, they may get cerebellar ataxia.')
11. ('migraine', '


 60%|██████    | 3/5 [00:14<00:09,  4.71s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 39.04it/s]

 
query_sent:  To reduce the encephalocele cyst.
query_word:  encephalocele
Top substitutions:
1. ('encephalitis', 'This is called " secondary encephalitis. "')
2. ('encephalopathy', 'Transmissible spongiform encephalopathy')
3. ('encephalomyelitis', 'Neurological disorders: These are problems with the brain and the nerves, like Guillain–Barré syndrome and Post-dengue acute disseminated encephalomyelitis.')
4. ('cerebellum', 'Spinal cord injuries   Problems that damage parts of the cerebellum.')
5. ('ciliopathy', 'It is a ciliopathy.')
6. ('meningoencephalitis', 'For example:   Meningovascular syphilis, which causes seizures   General Paresis: In this brain disease, syphilis causes chronic meningoencephalitis - an infection of both the meninges and the brain which does not go away.')
7. ('ciliopathies', 'The underlying cause of the ciliopathies may be defects in the mechanism of cilia in the cell.')
8. ('prosencephalon', 'Cyclopia is characterized by a failure of the prosencephalon to 


 80%|████████  | 4/5 [00:22<00:05,  5.73s/it]INFO:root:Use pytorch device: cuda

Batches: 100%|██████████| 1/1 [00:00<00:00, 100.57it/s]

 
query_sent:  Epistaxis in children is usually from Little's area, which is on the septal wall anteriorly.
query_word:  Epistaxis
Top substitutions:
1. ('anaphylaxis', 'Epinephrine is the best treatment for anaphylaxis.')
2. ('Erythema', 'Erythema ab igne is a rash of the skin, which is usually caused by the long-term exposure to heat or infrared radiation, and which is not a burn.')
3. ('Epidemic', 'Epidemic typhus')
4. ('Anosmia', 'Anosmia is losing the the sense of smell.')
5. ('Prognathism', 'Prognathism: part of the face sticks out more than normal usually the lower jaw.')
6. ('Episodic', 'Episodic migraine (EM) is when a person has migraine symptoms for 14 days or less in one month, while chronic migraine (CM) is when a person has migraine symptoms for 15 or more days in one month.')
7. ('Narcolepsy', 'Narcolepsy affects the way the nerves work.')
8. ('Angioedema', 'Angioedema may be caused by an allergic reaction.')
9. ('Agoraphobia', 'Agoraphobia is a medical condition that ca


100%|██████████| 5/5 [00:29<00:00,  5.96s/it]

 
query_sent:  Once epistaxis occurs, the importance of the first treatment for the haemostasis should be emphasized.
query_word:  epistaxis
Top substitutions:
1. ('anaphylaxis', 'Epinephrine is the best and first treatment used for anaphylaxis.')
2. ('angioedema', 'It is possible to have angioedema without hives.')
3. ('episodic', 'Some people who start off getting episodic migraines may start to get chronic migraines later.')
4. ('phobias', 'Causes and risk factors phobias is very.')
5. ('ectoparasite', 'The mite is an ectoparasite.')
6. ('anosmia', 'In some cases, accupuncture was successfully used to treat anosmia.')
7. ('trichomoniasis', 'Treatment for trichomoniasis is usually metronidazole (Flagyl), which kills protozoa like Trichomonas vaginalis.')
8. ('Psittacosis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the cornea.')
9. ('anaphylactic', 'They usually have fewer anaphylactic episodes, which are less severe, as they get older.')




In [19]:
query_sent = "He was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted."
query_word = "cardiomyopathy"
output_sr_ss(query_sent, query_word, k=50)

INFO:root:Use pytorch device: cuda
Batches: 100%|██████████| 1/1 [00:00<00:00, 97.54it/s]


 
query_sent:  He was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.
query_word:  cardiomyopathy
Top substitutions:
1. ('arrhythmia', 'Heart   Heart disease   Cardiac arrhythmia')
2. ('ventricular', 'This is called idiopathic ventricular tachycardia.')
3. ('Myocarditis', 'Myocarditis, also known as inflammatory cardiomyopathy, is inflammation and infection of the heart muscle.')
4. ('tachycardia', 'Some people with ventricular tachycardia do not have any symptoms, especially if the V-tach lasts for only a few seconds.')
5. ('cardiogenic', 'It can also be caused by cardiogenic shock, where the heart is not getting enough blood to pump out to the rest of the body.')
6. ('dystrophy', 'There is no cure for myotonic dystrophy.')
7. ('myocardial', 'redirect myocardial infarction')
8. ('rhabdomyosarcoma', 'For example, adults with rhabdomyosarcoma usually have tumors that grow faster and are harder to treat.')
9. ('leukemia', 'Chronic leukemia grows slowly.')
10. ('brady

## Original BERT

### Ranking > selection

In [12]:
# Load data (deserialize)
with open('./simplewiki_KDTree_bertcased256.pickle', 'rb') as handle:
    tree_copy = pickle.load(handle)

In [13]:
for query_sent, query_word in tqdm(sentences):
    output_sr_ss(query_sent, query_word, k=50)

  1%|          | 1/87 [00:07<11:16,  7.87s/it]

 
query_sent:  The antibiotic we gave you in case you had meningitis can turn your tears reddish.
query_word:  meningitis
Top substitutions:
1. ('pharyngitis', 'Most of the time, it is caused by viral pharyngitis.')
2. ('meningococcal', 'Dengue can also have some of the same symptoms as other diseases, like malaria, leptospirosis, typhoid fever, and meningococcal disease.')
3. ('meningoencephalitis', 'A stiff neck is a sign that the person has either meningitis (inflammation of the meninges, which cover the brain) or meningoencephalitis (swelling of both the meninges and the brain).')
4. ('appendicitis', 'Therefore immediate treatment in hospital is needed for appendicitis.')
5. ('bronchitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
6. ('diphtheria', 'Infected people who do not know they have diphtheria are called carriers of diphtheria, because they can spread the infection without being sick themselves.')
7. ('meningioma', 'a

  2%|▏         | 2/87 [00:16<11:33,  8.15s/it]

 
query_sent:  They swap spit, virus travels to his brain, leads to meningitis, leads to increased intracranial pressure.
query_word:  meningitis
Top substitutions:
1. ('conjunctivitis', 'People who do have symptoms usually have a low fever, conjunctivitis, joint pain (mainly in the hands and feet), and a rash.')
2. ('bronchitis', 'They can be bronchitis or pneumonia.')
3. ('pharyngitis', 'Secondary bacterial infections may occur resulting in sinusitis, pharyngitis, or an ear infection.')
4. ('sinusitis', 'Brain tumor   Lyme disease   Influenza   Brain metastasis   Foodborne Intoxication   Sinusitis   Meningitis   Encephalitis   In many cases headaches can be relieved naturally.')
5. ('meningococcal', 'Dengue can also have some of the same symptoms as other diseases, like malaria, leptospirosis, typhoid fever, and meningococcal disease.')
6. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
7. ('pancreatitis', 'Causes include p

  3%|▎         | 3/87 [00:23<10:55,  7.81s/it]

 
query_sent:  The leaves and bark are used for controlling blood pressure and gingivitis.
query_word:  gingivitis
Top substitutions:
1. ('endocarditis', 'Other CVDs are stroke, hypertensive heart disease, rheumatic heart disease, cardiomyopathy, atrial fibrillation, congenital heart disease, endocarditis, aortic aneurysms, and peripheral artery disease.')
2. ('glomerulonephritis', 'Chronic renal failure can be a sign of other diseases, like IgA nephritis, glomerulonephritis, chronic pyelonephritis, and urinary retention.')
3. ('conjunctivitis', 'People who do have symptoms usually have a low fever, conjunctivitis, joint pain (mainly in the hands and feet), and a rash.')
4. ('cirrhosis', 'Causes include perforation of the intestinal tract, pancreatitis, pelvic inflammatory disease, stomach ulcer, cirrhosis, or a ruptured appendix.')
5. ('pancreatitis', 'Causes include perforation of the intestinal tract, pancreatitis, pelvic inflammatory disease, stomach ulcer, cirrhosis, or a ruptured

  5%|▍         | 4/87 [00:30<10:24,  7.53s/it]

 
query_sent:  Unable to function, the muscles weaken and exhibit atrophy.
query_word:  atrophy
Top substitutions:
1. ('dystrophy', 'There is no cure for muscular dystrophy.')
2. ('osteoporosis', 'Osteoporosis is the weakening of bones in the body.')
3. ('hypertrophy', 'The exercise downregulates the pathways which induce muscle hypertrophy, or an increase in muscle size.')
4. ('myopathy', 'Muscle cramps, stiffness, and spasm can also be associated with myopathy.')
5. ('osteoarthritis', 'Osteoarthritis is when the cartilage in between two bones breaks down.')
6. ('ischemia', 'Other symptoms may result from decreased blood supply to other organs such as stroke or mesenteric ischemia.')
7. ('hemorrhagic', 'Most commonly this is a stroke or mini-stroke and sometimes can be a hemorrhagic stroke.')
8. ('atherosclerosis', 'Some people develop atherosclerosis faster than others.')
9. ('endocarditis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and swelling of the 

  6%|▌         | 5/87 [00:39<10:40,  7.81s/it]

 
query_sent:  If you don't exercise it, it'll atrophy, just like anything else.
query_word:  atrophy
Top substitutions:
1. ('dystrophy', 'There is no cure for muscular dystrophy.')
2. ('rupture', 'The abcess can rupture filling the abdomen with the infection.')
3. ('osteoarthritis', 'Osteoarthritis is when the cartilage in between two bones breaks down.')
4. ('hypermetabolism', 'The condition usually results from infection, injury (accident, surgery), hypoperfusion and hypermetabolism.')
5. ('osteoporosis', 'Elderly people are more likely to develop osteoporosis than younger people.')
6. ('atherosclerosis', 'Some people develop atherosclerosis faster than others.')
7. ('metastasize', 'Can it Metastasize (Spread)?')
8. ('intensify', 'Often, however, scratching can intensify itching and even cause further damage to the skin, dubbed the " itch-scratch-itch cycle " .')
9. ('diphtheria', 'A second type of diphtheria can affect the skin.')
10. ('hypertrophy', 'The exercise downregulates the

  7%|▋         | 6/87 [00:44<09:43,  7.20s/it]

 
query_sent:  Infection stresses his already weakened system, makes his SMA worse.
query_word:  SMA
Top substitutions:
1. ('cjd', 'The type of changes that show on the EEG will depend on the type of CJD the patient has, and how far along their disease is.')
2. ('bmi', ';Signs of anorexia   #People with anorexia do not want to keep a body weight and BMI that is normal for their age and height.')
3. ('bpd', 'Treating BPD is very difficult and takes a long time.')
4. ('pcos', 'PCOS is a complex and heterogeneous problem of uncertain etiology, although there is major evidence that the PCOS can be classified as a genetic health problem.')
5. ('smx', 'A three-day treatment with trimethoprim, TMP/SMX, or a fluoroquinolone is usually enough.')
6. ('avm', 'The most problems related to an AVM, are headaches and seizures, cranial nerve deficits , backaches, neckaches and eventual nausea.')
7. ('pah', 'Unfortunately, calcium channel blockers have been largely misused, being prescribed to many pat

  8%|▊         | 7/87 [00:50<08:55,  6.70s/it]

 
query_sent:  One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent lameness.
query_word:  lameness
Top substitutions:
1. ('arthritis', 'Extensive tophi may lead to chronic arthritis due to bone erosion.')
2. ('weakness', 'The symptom of muscle weakness usually begins around the age of four in boys and worsens quickly.')
3. ('stiffness', "In chikungunya's chronic phase, almost everyone with the virus (87% - 98%) gets very bad joint pain or stiffness.")
4. ('failure', 'It involves major shock and kidney failure because of the skeletal muscles being crushed.')
5. ('pain', 'It causes pain in the joints and muscles of the upper body.')
6. ('problems', 'Some causes are infections, such as encephalitis; autoimmune diseases; damage to the brain from strokes; and metabolic problems.')
7. ('spasticity', "Over time, the way to take care of cerebral palsy has changed from fixing a person's physical problems such a

  9%|▉         | 8/87 [00:58<09:17,  7.05s/it]

 
query_sent:  But SARS explains the cough, causes hypoperfusion, which explains the ischemic bowel.
query_word:  hypoperfusion
Top substitutions:
1. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
2. ('hypovolemic', 'In medicine, this is called hypovolemic shock.')
3. ('hypovolemia', 'Some of the known causes of POTS include:   Genetics   Recently being sick with a virus   Chronic diabetes   Hypovolemia (not having enough blood in the body).')
4. ('hemorrhagic', 'Most commonly this is a stroke or mini-stroke and sometimes can be a hemorrhagic stroke.')
5. ('hypoxia', 'It relieves pressure on the right side of the heart, but at the cost of lower oxygen levels in blood (hypoxia).')
6. ('hypoxemic', 'Stage 2 the patient is tachypneic, hypocapnic and hypoxemic; develops moderate liver dysfunction and possible hematologic abnormalities.')
7. ('hypotension', 'Doctors can treat hypotension by treating whatever is causing the low blood pressure.')
8. ('hypoxic', 'It 

 10%|█         | 9/87 [01:05<09:06,  7.00s/it]

 
query_sent:  There's a slight hypoperfusion in the anterior cortex.
query_word:  hypoperfusion
Top substitutions:
1. ('hemorrhage', 'In serious cases, the blood vessels rupture and there is bleeding within the brain (intracranial hemorrhage).')
2. ('hypoxia', 'Cerebral hypoxia (lack of oxygen to the brain) makes a person lose consciousness and stop breathing, and this makes the heart stop completely.')
3. ('hypothyroidism', 'Hypothyroidism, especially when the hypothyroidism is very bad.')
4. ('hemorrhages', 'The three most common types of bleeding problems in the brain are epidural hematomas, subdural hematomas, and subarachnoid hemorrhages.')
5. ('hypovolemic', 'In medicine, this is called hypovolemic shock.')
6. ('tachycardia', 'This is called postural tachycardia.')
7. ('nystagmus', 'When a child with BPVC is not having other symptoms, they may have positional nystagmus, which happens when the head is in a certain position.')
8. ('dyskinesia', 'But there are certain groups of peo

 11%|█▏        | 10/87 [01:14<09:47,  7.63s/it]

 
query_sent:  The condition usually results from infection, injury (accident, surgery), hypoperfusion and hypermetabolism.
query_word:  hypoperfusion
Top substitutions:
1. ('hypovolemia', 'Some of the known causes of POTS include:   Genetics   Recently being sick with a virus   Chronic diabetes   Hypovolemia (not having enough blood in the body).')
2. ('atherosclerosis', 'Causes of cerebrovascular disease can be divided into: atherosclerosis, embolism, aneurysms, low flow states, and other rare causes.')
3. ('infarction', 'Sometimes if a doctor thinks a person has atherosclerosis she may do tests to find atherosclerosis before it causes symptoms of ischemia or infarction.')
4. ('hypoxia', 'It relieves pressure on the right side of the heart, but at the cost of lower oxygen levels in blood (hypoxia).')
5. ('hypermetabolism', 'The condition usually results from infection, injury (accident, surgery), hypoperfusion and hypermetabolism.')
6. ('hemorrhagic', 'Most commonly this is a stroke 

 13%|█▎        | 11/87 [01:21<09:23,  7.42s/it]

 
query_sent:  Myelofibrosis fits, but testing takes at least 48 hours.
query_word:  Myelofibrosis
Top substitutions:
1. ('osteoporosis', 'There is no cure for osteoporosis.')
2. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
3. ('fibromyalgia', 'Fibromyalgia affects mostly women, though men can also be affected.')
4. ('sarcoidosis', 'Sarcoidosis may involve the brain.')
5. ('endometriosis', 'The doctor may see endometriosis tissue during laparoscopy.')
6. ('hypoglycemia', 'Hypoglycemia is treated by returning the blood sugar levels back to normal.')
7. ('edema', 'Macular edema is treated with focal laser treatment.')
8. ('thrombosis', 'This might be due to thrombosis.')
9. ('cystic', 'There is no cure for cystic fibrosis.')
10. ('hemorrhoids', 'Hemorrhoids are diagnosed by a doctor.')
11. ('angioedema', 'This is called hereditary angioedema.')
12. ('myelodysplastic', 'Myelodysplastic syndromes affect the bone marrow 

 14%|█▍        | 12/87 [01:27<09:00,  7.21s/it]

 
query_sent:  Biopsy showed non-specific inflammation, which suggests IBD.
query_word:  IBD
Top substitutions:
1. ('cjd', 'Doctors may suspect CJD when a person has certain symptoms.')
2. ('copd', '3% of all disability is related to COPD.')
3. ('fmd', 'While the disease is widespread around the world, North America, Central America, Australia, New Zealand, Japan, Chile, and many countries in Europe are considered free of FMD.')
4. ('bpd', 'The cause for BPD is not known.')
5. ('avm', 'Nevertheless, in more than half of patients with AVM, hemorrhage is the first symptom.')
6. ('tbi', 'Research since the early 1990s has improved TBI survival.')
7. ('sids', 'The risk of SIDS is highest in infants from two to four months old.')
8. ('bdd', 'BDD was eliminated in 82% of cases at post treatment and 77% at follow-up.')
9. ('mrsa', 'Because penicillin and cephalosporin are the most important drugs that doctors use to treat Staphylococcus aureus infections, doctors may not be able to cure peopl

 15%|█▍        | 13/87 [01:33<08:18,  6.74s/it]

 
query_sent:  The biopsy revealed it's melanoma.
query_word:  melanoma
Top substitutions:
1. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
2. ('adenocarcinoma', 'Other types of anal carcinoma are adenocarcinoma, lymphoma, sarcoma or melanoma.')
3. ('thrombosis', 'This might be due to thrombosis.')
4. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hematoma.')
5. ('sarcomas', 'Liposarcomas, like all sarcomas, are rare.')
6. ('sarcoma', 'Other types of anal carcinoma are adenocarcinoma, lymphoma, sarcoma or melanoma.')
7. ('tumours', 'The doctor may want to perform a rectal examination which will help detect internal hemorrhoids, tumours, polyps, or abscesses.')
8. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
9. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
10. ('carcinomas', 'The majority 

 16%|█▌        | 14/87 [01:42<08:50,  7.27s/it]

 
query_sent:  What's interesting is that mom's melanoma spread to the baby in utero, which means it's metastatic, which means it's late stage.
query_word:  melanoma
Top substitutions:
1. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
2. ('rhabdomyosarcoma', 'Prognosis also depends on the type of rhabdomyosarcoma a person has, what kind of treatment they get, whether their cancer has spread, and many other things.')
3. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hematoma.')
4. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
5. ('endometriosis', 'Having endometriosis can make it hard to have a baby.')
6. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
7. ('myeloma', 'There are three types:   Leukemia   Lymphoma   Multiple myeloma   A doctor will do some tests to find out if a person has a blo

 17%|█▋        | 15/87 [01:48<08:35,  7.16s/it]

 
query_sent:  How many people with a late stage melanoma are as healthy as she is?
query_word:  melanoma
Top substitutions:
1. ('rhabdomyosarcoma', 'Prognosis also depends on the type of rhabdomyosarcoma a person has, what kind of treatment they get, whether their cancer has spread, and many other things.')
2. ('glaucoma', 'If a person who has glaucoma does not get help from a doctor, they will not be able to see.')
3. ('sarcomas', 'Liposarcomas, like all sarcomas, are rare.')
4. ('sarcoma', "People with Ewing's sarcoma usually feel pain, and may have a lump.")
5. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
6. ('syphilis', 'Doctors might also try to get patients with syphilis to tell their sexual partners to get treatment.')
7. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
8. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hemato

 18%|█▊        | 16/87 [01:55<08:18,  7.03s/it]

 
query_sent:  His family history of melanoma may have affected his cellular outlook.
query_word:  melanoma
Top substitutions:
1. ('cirrhosis', 'Liver damage by cirrhosis cannot be repaired.')
2. ('carcinoma', 'Primary biliary cirrhosis, a disease of unknown etiology   Hepatocellular carcinoma')
3. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
4. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
5. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
6. ('sarcomas', 'Liposarcomas, like all sarcomas, are rare.')
7. ('melanocytic', 'Names given to types of birthmarks are: stork bites, Mongolian blue spots, strawberry marks, café au lait spots, nevus congenital melanocytic nevi, and port-wine stains.')
8. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
9. ('thrombosis', 'This might be due to thrombosis.

 20%|█▉        | 17/87 [02:04<08:52,  7.61s/it]

 
query_sent:  If the kid's dad died young of melanoma, how come the overprotective mom never took him to a dermatologist?
query_word:  melanoma
Top substitutions:
1. ('rhabdomyosarcoma', 'Prognosis also depends on the type of rhabdomyosarcoma a person has, what kind of treatment they get, whether their cancer has spread, and many other things.')
2. ('scleroderma', 'Common causes of pulmonary arterial hypertension (PAH) include HIV, scleroderma and other autoimmune disorders, cirrhosis and portal hypertension, sickle cell disease, congenital heart disease, thyroid diseases, and others.')
3. ('melanocytic', 'Names given to types of birthmarks are: stork bites, Mongolian blue spots, strawberry marks, café au lait spots, nevus congenital melanocytic nevi, and port-wine stains.')
4. ('hemorrhagic', 'Having extra blood will help a person if their blood pressure is getting very low (like it does in dengue shock syndrome) or if they do not have enough red blood cells in their blood (because t

 21%|██        | 18/87 [02:12<08:40,  7.55s/it]

 
query_sent:  He could have had a granuloma in his sinuses that bled, which could have been caused by Wegener's.
query_word:  granuloma
Top substitutions:
1. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hematoma.')
2. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
3. ('carcinoma', 'Gouty tophi, especially when not in a joint, can be mistaken for basal cell carcinoma, or other cancers.')
4. ('sarcoma', "People with Ewing's sarcoma usually feel pain, and may have a lump.")
5. ('melanoma', 'In everyday use, people often talk about melanoma, but there are other forms of skin cancer, too.')
6. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
7. ('granulocyte', 'These are lymphoid cells and young granulocyte cells (which are called myelocytes).

 22%|██▏       | 19/87 [02:17<07:49,  6.91s/it]

 
query_sent:  MRI showed a granuloma in his liver.
query_word:  granuloma
Top substitutions:
1. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hematoma.')
2. ('sarcoma', "Examples of bone tumors include osteosarcoma and Ewing's sarcoma.")
3. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
4. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
5. ('melanoma', 'Other types of anal carcinoma are adenocarcinoma, lymphoma, sarcoma or melanoma.')
6. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
7. ('granulocyte', 'These are lymphoid cells and young granulocyte cells (which are called myelocytes).')
8. ('tumours', 'The doctor may want to perform a rectal examination which will help detect internal hemo

 23%|██▎       | 20/87 [02:24<07:53,  7.07s/it]

 
query_sent:  We can nuke them with a mild course of chemo and then remove the granuloma.
query_word:  granuloma
Top substitutions:
1. ('hematoma', 'The picture at the top of this page shows an epidural hematoma.')
2. ('mycoplasma', 'Many common antibiotics such as penicillin that target cell walls do not affect mycoplasma.')
3. ('hematomas', 'In most cases, hematomas spontaneously revert.')
4. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
5. ('plasmodium', 'Weeks or months later, the Plasmodium can leave the liver to the blood, and the person will get sick again.')
6. ('melanoma', 'Malignant melanoma   The common appearance is an asymmetrical area, with an irregular border, color variation, and often greater than 6 mm diameter.')
7. ('granulocytes', 'Agranulocytosis is a medical condition where there are very few granulocytes (a kind of white blood cell) in the blood.')
8. ('mucosal', 'The stapler will also excise mucosal tissue to bl

 24%|██▍       | 21/87 [02:31<07:39,  6.96s/it]

 
query_sent:  Time to scan her for infections that could cause granulomas.
query_word:  granulomas
Top substitutions:
1. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
2. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
3. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
4. ('hematoma', 'Possible metabolic causes are such as liver failure or kidney failure; and chronic subdural hematoma.')
5. ('mycoplasma', 'Many common antibiotics such as penicillin that target cell walls do not affect mycoplasma.')
6. ('hematomas', 'Subdural hematomas can also be chronic.')
7. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
8. ('carcinoma', 'Primary biliary cirrhosis, a disease of unknown etiology   Hepatocellular carcinoma')
9. ('tumours', 'The

 25%|██▌       | 22/87 [02:38<07:37,  7.05s/it]

 
query_sent:  But the calcified pineal could be sarcoidosis.
query_word:  sarcoidosis
Top substitutions:
1. ('pneumoconiosis', 'Pneumoconiosis   Silicosis')
2. ('subarachnoid', 'About half of all people with subarachnoid hemorrhages die from them.')
3. ('rhabdomyosarcoma', 'What type of rhabdomyosarcoma does the person have?')
4. ('sarcoma', 'It is a type of soft-tissue sarcoma.')
5. ('meningococcemia', 'Septicemia   Meningococcemia   Bubonic Plague   Pneumonic plague   Black death   Google Health - Plague    HowStuffWorks - " Septicemic Plague "')
6. ('candidiasis', 'REDIRECT Candidiasis')
7. ('prostatitis', 'A prostatitis diagnosis is assigned at 8% of all urologist and 1% of all primary care physician visits in the USA.')
8. ('cystocele', 'In some cases a doctor may use surgical mesh to fix the cystocele.')
9. ('multinodular', 'These most commonly caused by Graves disease, but they can also be caused by inflammation or a multinodular goitre.')
10. ('fibrosis', 'In some cases, pulmo

 26%|██▋       | 23/87 [02:44<07:11,  6.73s/it]

 
query_sent:  Sarcoidosis could be in his brain and lungs.
query_word:  Sarcoidosis
Top substitutions:
1. ('ischemia', 'If the ischemia lasts long enough, the heart muscle that is not getting enough oxygen dies.')
2. ('pneumothorax', 'Complex pneumothorax will need medical attention.')
3. ('osteoporosis', 'There is no cure for osteoporosis.')
4. ('pneumoconiosis', 'Pneumoconiosis   Silicosis')
5. ('fibrosis', 'In some cases, pulmonary fibrosis is a complication of sarcoidosis.')
6. ('rhabdomyosarcoma', 'Prognosis also depends on the type of rhabdomyosarcoma a person has, what kind of treatment they get, whether their cancer has spread, and many other things.')
7. ('hemorrhage', 'A subarachnoid hemorrhage is bleeding into the subarachnoid space (the space between the arachnoid and the pia mater).')
8. ('neurosarcoidosis', 'If it does, it is called neurosarcoidosis.')
9. ('bronchiectasis', 'People with bronchiectasis may also cough up blood.')
10. ('glomerulonephritis', 'Renal cell carc

 28%|██▊       | 24/87 [02:50<06:45,  6.44s/it]

 
query_sent:  The PET scan was negative for sarcoidosis.
query_word:  sarcoidosis
Top substitutions:
1. ('fibrosis', 'In some cases, pulmonary fibrosis is a complication of sarcoidosis.')
2. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
3. ('bronchiectasis', 'Antibiotics are used to treat bronchiectasis.')
4. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
5. ('osteoporosis', 'There is no cure for osteoporosis.')
6. ('prostatitis', 'Antibiotics usually resolve acute prostatitis infections in a very short period of time.')
7. ('tumours', 'The doctor may want to perform a rectal examination which will help detect internal hemorrhoids, tumours, polyps, or abscesses.')
8. ('endometriosis', 'The doctor may see endometriosis tissue during laparoscopy.')
9. ('sarcoma', 'Leiomyosarcoma is one of the more common types of 

 29%|██▊       | 25/87 [02:56<06:22,  6.17s/it]

 
query_sent:  Sarcoidosis isn't infectious or environmental.
query_word:  Sarcoidosis
Top substitutions:
1. ('encephalitis', 'Encephalitis can happen when a person gets a bacterial infection, like bacterial meningitis.')
2. ('mesothelioma', 'Mesothelioma is a deadly form of cancer.')
3. ('mononucleosis', 'There is however no specific treatment for infectious mononucleosis.')
4. ('candidiasis', 'A weakened, unhealthy, or young immune system may allow candidiasis to develop.')
5. ('sepsis', 'Normally the immune system of the body is able to fight the germs and overcome the infection, but in sepsis something goes wrong.')
6. ('cysticercosis', 'Cysticercosis is a tissue infection caused by the young form of the pork tapeworm.')
7. ('neurosarcoidosis', 'Neurosarcoidosis is sarcoidosis in the brain.')
8. ('nephritis', 'Nephritis is often caused by infections, and toxins, but is most commonly caused by autoimmune disorders that affect the major organs like kidneys.')
9. ('rhabdomyosarcoma', 

 30%|██▉       | 26/87 [03:00<05:40,  5.59s/it]

 
query_sent:  The donor had hemosiderosis.
query_word:  hemosiderosis
Top substitutions:
1. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
2. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
3. ('encephalitis', 'Two examples are autoimmune encephalitis and acute disseminated encephalitis.')
4. ('cirrhosis', 'Liver damage by cirrhosis cannot be repaired.')
5. ('mononucleosis', 'There is however no specific treatment for infectious mononucleosis.')
6. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
7. ('hemorrhagic', 'When a person has bleeding, leaking plasma, and not enough platelets, they have dengue hemorrhagic fever.')
8. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
9. ('arteriosclerosis', 'The effects of arteriosclerosis c

 31%|███       | 27/87 [03:07<05:57,  5.96s/it]

 
query_sent:  The largest risk factors for spontaneous bleeding are high blood pressure and amyloidosis.
query_word:  amyloidosis
Top substitutions:
1. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
2. ('cirrhosis', 'Many years of infection may cause cirrhosis.')
3. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
4. ('myelogenous', 'Acute myelogenous leukemia (AML) is seen more commonly in adults than in children.')
5. ('carcinoma', 'Primary biliary cirrhosis, a disease of unknown etiology   Hepatocellular carcinoma')
6. ('carcinomas', 'Squamous cell carcinomas most often start in the tongue.')
7. ('glomerulonephritis', 'Streptococcal infections might also lead to swelling of the kidneys (acute glomerulonephritis).')
8. ('neuropathy', 'Its symptoms are also similar to progressive inflammatory neuropathy.')
9. ('encephalitis', 'Two examples are autoimmune encephalitis and acute disseminated encephalitis.')
10. ('end

 32%|███▏      | 28/87 [03:15<06:38,  6.75s/it]

 
query_sent:  If you had amyloidosis, there'd be speckling on your cardiac ultrasound.
query_word:  amyloidosis
Top substitutions:
1. ('hypothyroidism', 'If a person has hypothyroidism, their thyroid gland does not make enough hormones.')
2. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
3. ('cirrhosis', 'Liver damage by cirrhosis cannot be repaired.')
4. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
5. ('endometriosis', 'Having endometriosis can make it hard to have a baby.')
6. ('hemophilia', 'To treat this, an affected person can get a blood donation from someone without hemophilia.')
7. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
8. ('rhabdomyosarcoma', 'Prognosis also depends on the type of rhabdomyosarcoma a person has, what kind of treatment they get, whether their cancer has spread, and many other things.')
9. ('car

 33%|███▎      | 29/87 [03:25<07:18,  7.56s/it]

 
query_sent:  Biopsy showed deposits in the renal endothelium consistent with amyloidosis.
query_word:  amyloidosis
Top substitutions:
1. ('cirrhosis', 'Primary biliary cirrhosis, a disease of unknown etiology   Hepatocellular carcinoma')
2. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
3. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
4. ('granulomatous', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
5. ('glomerulonephritis', 'Renal cell carcinoma   Cystitis   Bladder cancer   Pyelonephritis   Prostate cancer   Urethritis   Wilms tumor   Kidney stones   Glomerulonephritis   Possibly, allergies.')
6. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
7. ('endometriosis', 'The doctor may see endometriosis tissue during laparoscopy.')
8. ('hematoma', 'Possible metabolic causes are such as liver failure 

 34%|███▍      | 30/87 [03:31<06:53,  7.25s/it]

 
query_sent:  He needs a bone marrow transplant for the amyloidosis.
query_word:  amyloidosis
Top substitutions:
1. ('fibrosis', 'The parent might not have cystic fibrosis but still might have the gene.')
2. ('cirrhosis', 'Liver damage by cirrhosis cannot be repaired.')
3. ('carcinoma', 'Merkel-cell carcinoma usually shows up on the head, neck, perianal and eyelid.')
4. ('myeloma', 'Some symptoms of the multiple myeloma are bone pain, renal failure, recurring infections and anemia.')
5. ('mononucleosis', 'There is however no specific treatment for infectious mononucleosis.')
6. ('endometriosis', 'The doctor doing the surgery can look inside the body to find endometriosis tissue.')
7. ('carcinomas', 'The majority of these are basal cell carcinomas.')
8. ('sclerosis', 'Multiple sclerosis -Citizendium')
9. ('haemophilia', 'Haemophilia A happens in about 1 in 5,000–10,000 male births.')
10. ('hematomas', 'About 15% to 20% of people with epidural hematomas die from them.')
11. ('ciliopathi

 36%|███▌      | 31/87 [03:41<07:21,  7.89s/it]

 
query_sent:  Do a bronchoscopy, it'll set off a laryngospasm.
query_word:  laryngospasm
Top substitutions:
1. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
2. ('glaucoma', 'If a person who has glaucoma does not get help from a doctor, they will not be able to see.')
3. ('bronchiectasis', 'People with bronchiectasis have pain in their chests.')
4. ('esophageal', 'One sign of esophageal candidiasis is painful swallowing.')
5. ('hemorrhagic', 'Most commonly this is a stroke or mini-stroke and sometimes can be a hemorrhagic stroke.')
6. ('pneumothorax', 'Complex pneumothorax will need medical attention.')
7. ('dyskinesia', 'But some other people can get tardive dyskinesia too.')
8. ('thrombosis', 'This might be due to thrombosis.')
9. ('laryngeal', 'Croup also can be laryngeal diphtheria, bacterial tracheitis, laryngotracheobronchitis, and laryngotracheobronchopneumonitis.')
10. ('osteoarthritis', 'The three most common kinds are osteoarthritis, rheumatoid art

 37%|███▋      | 32/87 [03:47<06:51,  7.48s/it]

 
query_sent:  If she had porphyria, you would have seen purple urine.
query_word:  porphyria
Top substitutions:
1. ('syphilis', 'Antibiotic medication breaks open syphilis bacteria to kill them.')
2. ('porphyrins', 'They have disorders of certain enzymes which normally work in the production of porphyrins and heme.')
3. ('anaphylaxis', 'Later, the name was changed to " anaphylaxis " because it sounded nicer.')
4. ('polydipsia', 'Three of the early signs of hyperglycemia are " the 3 polys " : polydipsia (feeling very thirsty), polyphagia (feeling very hungry), and polyuria (urinating a lot).')
5. ('pyelonephritis', 'In the upper urinary tract, it is known as pyelonephritis (a kidney infection).')
6. ('cystocele', 'Some women can have repeated urine infections if they have a cystocele.')
7. ('psoriasis', 'Washing with Psoriasis removes the dry skin, but then the skin underneath is raw.')
8. ('metacyclic', 'The sandflies inject the infective stage, metacyclic promastigotes, during blood 

 38%|███▊      | 33/87 [03:55<06:48,  7.56s/it]

 
query_sent:  If you're wrong about porphyria, the treatment could box her kidneys.
query_word:  porphyria
Top substitutions:
1. ('anaphylaxis', 'Doctors can prescribe EpiPens to people who know they have allergies, so they can treat themselves for anaphylaxis.')
2. ('hypoglycemia', 'In the worst cases, when the blood sugar is very low, hypoglycemia is treated by giving sugar water intravenously (through a needle placed into a vein).')
3. ('polycythemia', 'Gout often makes these other problems worse or harder to treat: polycythemia, lead poisoning, renal failure, hemolytic anemia, psoriasis, and solid organ transplants.')
4. ('syphilis', 'Antibiotic medication breaks open syphilis bacteria to kill them.')
5. ('hemophilia', 'To treat this, an affected person can get a blood donation from someone without hemophilia.')
6. ('trichomoniasis', "Without treatment, trichomoniasis can stay in a woman's body for months to years.")
7. ('psoriasis', 'Gout often makes these other problems worse or

 39%|███▉      | 34/87 [04:02<06:30,  7.37s/it]

 
query_sent:  Also, many blind people have nystagmus, which is one reason that some wear dark glasses.
query_word:  nystagmus
Top substitutions:
1. ('glaucoma', 'When sight is lost from glaucoma, it cannot be recovered.')
2. ('dyskinesia', 'The person with tardive dyskinesia cannot stop or control these movements.')
3. ('amblyopia', 'Amblyopia, more commonly called lazy eye, is a condition that affects the eye.')
4. ('epilepsy', 'Many people with epilepsy have an aura before having a seizure.')
5. ('astigmatism', 'Astigmatism does not affect the health of the eye, and eyes with astigmatism can be perfectly healthy.')
6. ('hemiplegia', 'Symptoms might include: hemiplegia (an inability to move one or more limbs on one side of the body), aphasia (inability to understand or use language), or an inability to see one side of the visual field.')
7. ('dysgraphia', 'People with dysgraphia can move their hand enough, and are also capable of understanding how to write.')
8. ('hyperacusis', 'Loud

 40%|████      | 35/87 [04:10<06:32,  7.54s/it]

 
query_sent:  There are two key forms of nystagmus: pathological and physiological, with variations within each type.
query_word:  nystagmus
Top substitutions:
1. ('pneumothorax', 'A Pneumothorax can occur for no obvious reason (spontaneous).')
2. ('rhabdomyosarcoma', 'The choice of how to treat rhabdomyosarcoma depends on many things.')
3. ('glaucoma', 'There are two kinds of glaucoma, the kind that happens very fast, and the kind that happens slowly, over a long time.')
4. ('astigmatism', 'Astigmatism does not affect the health of the eye, and eyes with astigmatism can be perfectly healthy.')
5. ('neurosarcoidosis', 'If it does, it is called neurosarcoidosis.')
6. ('pheochromocytomas', 'Most adrenal pheochromocytomas and all adrenocortical adenomas are benign tumors, which do not metastasize or destroy nearby tissues, but may cause significant health problems by unbalancing hormones.')
7. ('myositis', 'Some conditions, such as myositis, can be considered both neuromuscular and muscu

 41%|████▏     | 36/87 [04:17<06:24,  7.54s/it]

 
query_sent:  People with bronchiectasis may have bad breath indicative of active infection.
query_word:  bronchiectasis
Top substitutions:
1. ('bronchiolitis', 'Many things can cause bronchiolitis.')
2. ('bronchitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
3. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
4. ('diphtheria', 'People with cutaneous diphtheria may get ulcers, covered by a gray membrane, on their skin.')
5. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
6. ('rheumatoid', 'People who have rheumatoid arthiritis and smoke are ten times more likely to have the disease.')
7. ('hypertension', 'People with pulmonary hypertension have difficulty breathing.')
8. ('tachycardia', 'Ventricular tachycardia can also cause low blood pressure.')
9. ('glaucoma', 'A family history of glaucoma is a risk factor.')
10. ('pancreatitis', 'Causes may include sepsis, pancreatitis, traum

 43%|████▎     | 37/87 [04:25<06:24,  7.69s/it]

 
query_sent:  Some people with bronchiectasis may produce frequent green/yellow sputum (up to 240ml (8 oz) daily).
query_word:  bronchiectasis
Top substitutions:
1. ('bronchiolitis', 'Usually treatment for bronchiolitis is to make the person as comfortable as possible.')
2. ('bronchitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
3. ('diphtheria', 'People with cutaneous diphtheria may get ulcers, covered by a gray membrane, on their skin.')
4. ('glaucoma', 'A family history of glaucoma is a risk factor.')
5. ('bronchospasms', 'Some of the things that can cause bronchospasms are: consuming foods, taking medicines, allergic responses to insects, and hormone levels, particularly in women.')
6. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
7. ('rhabdomyosarcoma', 'For example, adults with rhabdomyosarcoma usually have tumors that grow faster and are harder to treat.')
8. ('anaphylaxis', 'More than 90% of 

 44%|████▎     | 38/87 [04:34<06:26,  7.89s/it]

 
query_sent:  Bronchiectasis has both congenital and acquired causes, with the latter more frequent.
query_word:  Bronchiectasis
Top substitutions:
1. ('endocarditis', 'Endocarditis most often affects the heart valves.')
2. ('nephritis', 'Nephritis can produce glomerular injury, by disturbing the glomerular structure with inflammatory cell proliferation.')
3. ('myelodysplastic', 'Myelodysplastic syndromes affect the bone marrow stem cells.')
4. ('sarcoidosis', 'Sarcoidosis may involve the brain.')
5. ('encephalitis', 'Encephalitis can cause serious symptoms, like seizures and strokes, and can be fatal.')
6. ('bronchospasms', 'Bronchospasms can be caused by asthma, chronic bronchitis and anaphylaxis.')
7. ('hypoglycemia', 'Hypoglycemia can cause many symptoms.')
8. ('bronchiolitis', 'The disease Bronchiolitis is not the same as Bronchitis.')
9. ('glaucoma', 'Glaucoma, particularly primary open-angle glaucoma, is associated with mutations in several different genes   Also, the likelihoo

 45%|████▍     | 39/87 [04:41<06:16,  7.85s/it]

 
query_sent:  However, Satie's military career did not last very long; within a few months he was discharged after deliberately infecting himself with bronchitis.
query_word:  bronchitis
Top substitutions:
1. ('bronchiolitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
2. ('bronchiectasis', 'Some people with bronchiectasis have sinusitis that does not go away.')
3. ('meningitis', 'For this reason, in certain countries, like Germany, doctors have to tell the authorities that someone might have meningitis.')
4. ('pharyngitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
5. ('diarrhea', 'Because digestion has suddenly stopped, the person may have nausea, diarrhea, or other gastrointestinal problems.')
6. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
7. ('syphilis', 'Although some pat

 46%|████▌     | 40/87 [04:48<05:55,  7.57s/it]

 
query_sent:  In January 1876, his father died of bronchitis following a long period of depression.
query_word:  bronchitis
Top substitutions:
1. ('bronchiolitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
2. ('meningitis', 'For this reason, in certain countries, like Germany, doctors have to tell the authorities that someone might have meningitis.')
3. ('tracheitis', 'Other uncommon complications include bacterial tracheitis (infection of the trachea), pneumonia (lung infection), and pulmonary edema (fluid in the lungs).')
4. ('pharyngitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
5. ('syphilis', 'Although some patients died from malaria, this was preferable to the almost-certain death from syphilis.')
6. ('bronchial', 'The lower respiratory tract consists of the trachea (wind pipe), bronchial tubes, the bronchioles, and the lungs.')
7.

 47%|████▋     | 41/87 [04:55<05:37,  7.35s/it]

 
query_sent:  She died in The Hague on 20 March 1934, of complications from bronchitis at the age of 75, and was buried in Delft.
query_word:  bronchitis
Top substitutions:
1. ('bronchiolitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
2. ('bronchiectasis', 'Some people with bronchiectasis have sinusitis that does not go away.')
3. ('pharyngitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
4. ('meningitis', 'For this reason, in certain countries, like Germany, doctors have to tell the authorities that someone might have meningitis.')
5. ('conjunctivitis', 'People who do have symptoms usually have a low fever, conjunctivitis, joint pain (mainly in the hands and feet), and a rash.')
6. ('fibrosis', 'This sometimes happens when the lung is already injured somehow, like from diseases such as cancer or cystic fibrosis.')
7. ('hypoglycemia', 'Cau

 48%|████▊     | 42/87 [05:02<05:24,  7.21s/it]

 
query_sent:  She died in The Hague on 20 March 1934, of complications from bronchitis at the age of 75.
query_word:  bronchitis
Top substitutions:
1. ('bronchiolitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
2. ('bronchiectasis', 'Some people with bronchiectasis have sinusitis that does not go away.')
3. ('pharyngitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
4. ('meningitis', 'For this reason, in certain countries, like Germany, doctors have to tell the authorities that someone might have meningitis.')
5. ('conjunctivitis', 'People who do have symptoms usually have a low fever, conjunctivitis, joint pain (mainly in the hands and feet), and a rash.')
6. ('fibrosis', 'It is often found in people with cystic fibrosis.')
7. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
8. ('t

 49%|████▉     | 43/87 [05:09<05:13,  7.12s/it]

 
query_sent:  Menuhin died in Martin Luther Hospital, Berlin, Germany, from complications of bronchitis.
query_word:  bronchitis
Top substitutions:
1. ('bronchiolitis', 'The medicine used to treat bronchitis may not be the same as those used to treat bronchiolitis.')
2. ('bronchiectasis', 'Some people with bronchiectasis have sinusitis that does not go away.')
3. ('meningitis', 'For this reason, in certain countries, like Germany, doctors have to tell the authorities that someone might have meningitis.')
4. ('tracheitis', 'Other uncommon complications include bacterial tracheitis (infection of the trachea), pneumonia (lung infection), and pulmonary edema (fluid in the lungs).')
5. ('bronchial', 'The lower respiratory tract consists of the trachea (wind pipe), bronchial tubes, the bronchioles, and the lungs.')
6. ('conjunctivitis', 'People who do have symptoms usually have a low fever, conjunctivitis, joint pain (mainly in the hands and feet), and a rash.')
7. ('pharyngitis', 'Most of 

 51%|█████     | 44/87 [05:16<05:00,  6.98s/it]

 
query_sent:  It is one of the rarest forms of synesthesia.
query_word:  synesthesia
Top substitutions:
1. ('synesthetes', 'People who have synesthesia are called synesthetes.')
2. ('psychosis', 'They may also experience psychosis, where they cannot tell what is real and what is not.')
3. ('neuropathic', 'This is called neuropathic pain.')
4. ('glaucoma', 'There are two kinds of glaucoma, the kind that happens very fast, and the kind that happens slowly, over a long time.')
5. ('epilepsy', 'There are many different forms of seizures, and there are also many different forms of epilepsy.')
6. ('paresthesia', 'The tingling feeling you get when you move is paresthesia.')
7. ('hyperactivity', 'This can cause seizures and hyperactivity.')
8. ('tachycardia', 'This is called postural tachycardia.')
9. ('hallucinations', 'There can be other types of auditory hallucinations besides these three.')
10. ('paresthesias', "According to DSM-5, a person is having a panic attack if they suddenly start 

 52%|█████▏    | 45/87 [05:23<04:57,  7.08s/it]

 
query_sent:  An outbreak of tularemia occurred in Kosovo in 1999-2000.
query_word:  tularemia
Top substitutions:
1. ('syphilis', 'In sub-Saharan Africa, syphilis causes up to 20% of perinatal deaths (deaths that happen soon after a baby is born).')
2. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
3. ('staphylococcus', 'When it is caused by Staphylococcus aureus, the symptoms are generally high fever, low blood pressure, confusion, and malaise.')
4. ('mononucleosis', 'Infectious mononucleosis can cause swollen lymph nodes in the neck and a sore throat, fever, and it can make the tonsils get bigger.')
5. ('hemorrhagic', 'However, a small number of people get dengue hemorrhagic fever or dengue shock syndrome.')
6. ('septicemic', 'Like the others, the septicemic plague spread from the East through trade routes on the Black Sea and down to the Mediterranean Sea.')
7. ('anemia', 'Viruses that are believed to cause leukemia include:   Human immunodeficie

 53%|█████▎    | 46/87 [05:30<04:52,  7.14s/it]

 
query_sent:  Over the following summers, Martha's Vineyard was identified as the only place in the world where documented cases of tularemia resulted from lawn mowing.
query_word:  tularemia
Top substitutions:
1. ('encephalitis', 'There are also vaccines for rabies, bacterial meningitis, Japanese encephalitis, Human Papillomavirus (HPV), and some other diseases that can cause encephalitis.')
2. ('syphilis', 'In sub-Saharan Africa, syphilis causes up to 20% of perinatal deaths (deaths that happen soon after a baby is born).')
3. ('diphtheria', 'Infected people who do not know they have diphtheria are called carriers of diphtheria, because they can spread the infection without being sick themselves.')
4. ('hemorrhagic', 'However, a small number of people get dengue hemorrhagic fever or dengue shock syndrome.')
5. ('staphylococcus', 'If the disease is not treated, someone with toxic shock syndrome caused by Staphylococcus aureus can go into a coma.')
6. ('meningococcal', 'Dengue can als

 54%|█████▍    | 47/87 [05:36<04:34,  6.85s/it]

 
query_sent:  Tularemia is not spread directly from person to person.
query_word:  Tularemia
Top substitutions:
1. ('hypoglycemia', 'Hypoglycemia is treated by returning the blood sugar levels back to normal.')
2. ('anemia', 'Aplastic Anemia can be treated by a medical professional.')
3. ('sarcoidosis', 'Sarcoidosis may involve the brain.')
4. ('acromegaly', 'Acromegaly is hard to diagnose when it is just beginning.')
5. ('psittacosis', 'Psittacosis can be easily treated with antibiotics, especially with Tetracycline.')
6. ('gonorrhea', 'Gonorrhea is a disease.')
7. ('angioedema', 'Angioedema may be caused by an allergic reaction.')
8. ('chorea', 'Chorea can develop as a complication of pregnancy.')
9. ('myelodysplastic', 'Myelodysplastic syndromes affect the bone marrow stem cells.')
10. ('endometriosis', 'Endometriosis can be treated by a doctor.')
11. ('otitis', 'Otitis interna affects the inner ear.')
12. ('tonsillitis', 'Tonsillitis is spread by human contact.')


 55%|█████▌    | 48/87 [05:43<04:25,  6.80s/it]

 
query_sent:  There is currently no effective treatment or cure for akinetopsia.
query_word:  akinetopsia
Top substitutions:
1. ('pneumothorax', 'The most common symptoms of Pneumothorax are chest pains.')
2. ('anaphylaxis', 'About 0.7% to 20% of people with anaphylaxis die from it.')
3. ('haemophilia', 'Haemophilia B - not as severe, but much less common.')
4. ('dyskinesia', 'The United States Food and Drug Administration (FDA) has not approved any medicines to treat the symptoms of tardive dyskinesia.')
5. ('diphtheria', 'Croup due to diphtheria has become nearly unknown since most people are immunized.')
6. ('subarachnoid', 'About half of all people with subarachnoid hemorrhages die from them.')
7. ('trichomoniasis', 'Most people with trichomoniasis do not have any symptoms.')
8. ('hypercholesterolemia', 'There are other kinds of medicines to treat hypercholesterolemia.')
9. ('polydipsia', 'Three of the early signs of hyperglycemia are " the 3 polys " : polydipsia (feeling very thi

 56%|█████▋    | 49/87 [05:52<04:42,  7.44s/it]

 
query_sent:  Patients with akinetopsia struggle with many issues in their day-to-day life, depending on the severity of their condition.
query_word:  akinetopsia
Top substitutions:
1. ('pneumothorax', 'The most common symptoms of Pneumothorax are chest pains.')
2. ('haemophilia', 'A person with haemophilia is called a haemophiliac.')
3. ('polydipsia', 'Three of the early signs of hyperglycemia are " the 3 polys " : polydipsia (feeling very thirsty), polyphagia (feeling very hungry), and polyuria (urinating a lot).')
4. ('dyskinesia', 'Most people who get tardive dyskinesia are people who have schizophrenia, schizoaffective disorder, or bipolar disorder, and have been on antipsychotic medicines for a long time.')
5. ('anaphylaxis', 'One study of children with anaphylaxis found that 60% had a history of previous atopic diseases.')
6. ('atherosclerosis', 'People with atherosclerosis must also be watched by doctors to make sure they do not get ischemia or infarction.')
7. ('diphtheria', 

 57%|█████▋    | 50/87 [06:01<04:53,  7.94s/it]

 
query_sent:  Besides simple perception, akinetopsia also disturbs visuomotor tasks, such as reaching for objects and catching objects.
query_word:  akinetopsia
Top substitutions:
1. ('pneumothorax', 'Some other symptoms of Pneumothorax are shortness of breath, rapid breathing, and coughing.')
2. ('dyskinesia', 'People that have this very high risk include:   People with fetal alcohol syndrome   People with other developmental disabilities   People with brain diseases or injuries   The symptoms of tardive dyskinesia are movements of the face, lips, tongue, torso, arms, and legs.')
3. ('dyslexia', 'One thing that causes dyslexia is a problem with the central hearing nerve.')
4. ('aphasia', 'As with other types of aphasia, the symptoms that accompany PPA depend on what parts of the left hemisphere are significantly damaged.')
5. ('epilepsy', 'In most cases, epilepsy is caused by scars in the brain.')
6. ('prosopagnosia', 'People with this type of prosopagnosia never develop the ability 

 59%|█████▊    | 51/87 [06:11<05:10,  8.64s/it]

 
query_sent:  Inconspicuous akinetopsia can be selectively and temporarily induced using transcranial magnetic stimulation (TMS) of area V5 of the visual cortex in healthy subjects.
query_word:  akinetopsia
Top substitutions:
1. ('dyskinesia', 'Tardive dyskinesia is usually caused by taking antipsychotic medicines in high doses, or for a long time.')
2. ('epilepsy', 'In most cases, epilepsy is caused by scars in the brain.')
3. ('encephalitis', "Viral encephalitis can also be caused by a latent virus - a virus that hides from the body's immune system the brain's nerve cells.")
4. ('pneumothorax', 'Some other symptoms of Pneumothorax are shortness of breath, rapid breathing, and coughing.')
5. ('aphasia', 'As with other types of aphasia, the symptoms that accompany PPA depend on what parts of the left hemisphere are significantly damaged.')
6. ('prosopagnosia', 'Prosopagnosia can be caused by lesions (broken parts of body tissue) in the lower part of the brain.')
7. ('pheochromocytomas

 60%|█████▉    | 52/87 [06:18<04:41,  8.04s/it]

 
query_sent:  Only in a few cases and after many years does it cause demyelination.
query_word:  demyelination
Top substitutions:
1. ('hyperglycemia', 'The goal is to try to dilute the sugar in the blood, and decrease the high levels of hyperglycemia.')
2. ('dehydration', 'Sometimes, low blood pressure is caused by dehydration.')
3. ('neutrophils', 'If the agranulocytosis is because of a lack of neutrophils (called neutropenia), it is especially bad.')
4. ('methemoglobinemia', 'It can cause methemoglobinemia, which means there is higher than normal level of methemoglobin in the blood.')
5. ('ketoacidosis', 'These tests will show high blood sugar, which does not happen with other forms of ketoacidosis.')
6. ('chloroquine', 'The most important resistance is chloroquine-resistance.')
7. ('glycogen', "Glycogen storage disease type II (Pompe's Disease) is a disease that causes glycogen to accumulate in lysosomes.")
8. ('anaphylatoxins', 'It is caused by the release (degranulation) of subst

 61%|██████    | 53/87 [06:25<04:25,  7.82s/it]

 
query_sent:  Ho was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'If a person has pulseless ventricular tachycardia, they need to be shocked with a defibrillator.')
2. ('cardiotoxicity', 'Large doses of quinine can cause:   skin rashes   deafness   sleepiness   blindness   anaphylactic shock   disturbances in abnormal heart rhythm   death from cardiotoxicity.')
3. ('cardiopulmonary', 'Then - like in any situation where a person suddenly becomes unconscious and has no pulse - cardiopulmonary resuscitation (CPR) should be started right away.')
4. ('bradycardia', 'Also, if bradycardia gets very bad, the heart will be unable to pump enough blood and oxygen to the body.')
5. ('idiopathic', 'The chance of sudden death is highest for people who:   Have ischemic cardiomyopathy (their heart muscle was damaged by not getting enough oxygen); and   Have ventricular tachycardia that comes and goes   The chan

 62%|██████▏   | 54/87 [06:33<04:12,  7.64s/it]

 
query_sent:  He was diagnosed with cardiomyopathy in 2005 and had a pacemaker implanted.
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'If a person has pulseless ventricular tachycardia, they need to be shocked with a defibrillator.')
2. ('cardiotoxicity', 'Large doses of quinine can cause:   skin rashes   deafness   sleepiness   blindness   anaphylactic shock   disturbances in abnormal heart rhythm   death from cardiotoxicity.')
3. ('idiopathic', 'The chance of sudden death is highest for people who:   Have ischemic cardiomyopathy (their heart muscle was damaged by not getting enough oxygen); and   Have ventricular tachycardia that comes and goes   The chance of sudden death is lowest for people with idiopathic ventricular tachycardia.')
4. ('bradycardia', 'Also, if bradycardia gets very bad, the heart will be unable to pump enough blood and oxygen to the body.')
5. ('cardiopulmonary', 'Emergency treatment of respiratory failure is very similar to that of cardiop

 63%|██████▎   | 55/87 [06:39<03:50,  7.20s/it]

 
query_sent:  His echocardiogram was negative for cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'Postural Orthostatic Tachycardia Syndrome - from Dysautonomia International (includes detailed information about POTS, research, and finding doctors and support across the world)')
2. ('idiopathic', 'The chance of sudden death is highest for people who:   Have ischemic cardiomyopathy (their heart muscle was damaged by not getting enough oxygen); and   Have ventricular tachycardia that comes and goes   The chance of sudden death is lowest for people with idiopathic ventricular tachycardia.')
3. ('cardiotoxicity', 'Large doses of quinine can cause:   skin rashes   deafness   sleepiness   blindness   anaphylactic shock   disturbances in abnormal heart rhythm   death from cardiotoxicity.')
4. ('dystrophy', 'There is no cure for myotonic dystrophy.')
5. ('neuropathy', 'Diabetic neuropathy, damage to blood vessels near nerves can result in reduced blood flow.'

 64%|██████▍   | 56/87 [06:45<03:37,  7.00s/it]

 
query_sent:  And a subsequent biopsy revealed irreversible cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'Postural Orthostatic Tachycardia Syndrome - from Dysautonomia International (includes detailed information about POTS, research, and finding doctors and support across the world)')
2. ('polyneuropathy', 'The four patterns of this condition are polyneuropathy, mononeuropathy, nononeuritis mp and autonomic nerve damage.')
3. ('cardiopulmonary', 'Emergency treatment of respiratory failure is very similar to that of cardiopulmonary resuscitation.')
4. ('neuropathy', 'Its symptoms are also similar to progressive inflammatory neuropathy.')
5. ('scoliosis', 'Other risk factors are scoliosis, injury and several types of surgery.')
6. ('idiopathic', 'This is called idiopathic ventricular tachycardia.')
7. ('thrombosis', 'This might be due to thrombosis.')
8. ('endocarditis', 'In rare cases, Psittacosis can cause endocarditis - swelling of joints, and sw

 66%|██████▌   | 57/87 [06:50<03:09,  6.31s/it]

 
query_sent:  The kid has severe cardiomyopathy.
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'The chance of sudden death is highest for people who:   Have ischemic cardiomyopathy (their heart muscle was damaged by not getting enough oxygen); and   Have ventricular tachycardia that comes and goes   The chance of sudden death is lowest for people with idiopathic ventricular tachycardia.')
2. ('bronchiectasis', 'People with bronchiectasis have pain in their chests.')
3. ('idiopathic', 'The chance of sudden death is highest for people who:   Have ischemic cardiomyopathy (their heart muscle was damaged by not getting enough oxygen); and   Have ventricular tachycardia that comes and goes   The chance of sudden death is lowest for people with idiopathic ventricular tachycardia.')
4. ('glomerulonephritis', 'Chronic renal failure can be a sign of other diseases, like IgA nephritis, glomerulonephritis, chronic pyelonephritis, and urinary retention.')
5. ('osteoarthritis', 

 67%|██████▋   | 58/87 [06:57<03:09,  6.53s/it]

 
query_sent:  Which means it could be a hematological problem plus cardiomyopathy
query_word:  cardiomyopathy
Top substitutions:
1. ('tachycardia', 'Postural Orthostatic Tachycardia Syndrome - from Dysautonomia International (includes detailed information about POTS, research, and finding doctors and support across the world)')
2. ('lymphoma', 'In lymphoma, these lymphocytes start to multiply in an abnormal way.')
3. ('neuropathy', 'Diabetic neuropathy, damage to blood vessels near nerves can result in reduced blood flow.')
4. ('bradycardia', 'Also, if bradycardia gets very bad, the heart will be unable to pump enough blood and oxygen to the body.')
5. ('polyneuropathy', 'The four patterns of this condition are polyneuropathy, mononeuropathy, nononeuritis mp and autonomic nerve damage.')
6. ('encephalopathy', ',other forms like encephalopathy or delirium may develop relatively slowly, over a number of years.')
7. ('eclampsia', 'Where this is unavailable, eclampsia, obstetric fistula, 

 68%|██████▊   | 59/87 [07:04<03:09,  6.76s/it]

 
query_sent:  An acute myocardial infarction, happens when a blood vessel in the heart suddenly becomes blocked.
query_word:  myocardial
Top substitutions:
1. ('ventricular', 'If ventricular fibrillation continues for long enough, the blood will stop circulating around the body.')
2. ('coronary', 'This can cause the heart’s blood vessels to get narrower suddenly (this is called " coronary artery spasm " ).')
3. ('aortic', 'Since pulmonary venous hypertension is synonymous with congestive heart failure, the treatment is to optimize left ventricular function by the use of diuretics, beta blockers, ACE inhibitors, etc., or to repair/replace the mitral valve or aortic valve.')
4. ('myocardium', 'The most important thing is to save as much myocardium (heart muscle) as possible and prevent more complications.')
5. ('cardiomyopathy', 'Problems caused by this may include heart failure due to cardiomyopathy or cardiac arrest.')
6. ('infarction', 'This can cause a myocardial infarction (a heart

 69%|██████▉   | 60/87 [07:10<02:53,  6.42s/it]

 
query_sent:  Patau syndrome is a syndrome caused by a chromosomal abnormality, in which some or all of the cells of the body contain extra genetic material from chromosome 13.
query_word:  Patau
Top substitutions:
1. ('joubert', 'Joubert syndrome is a genetic birth defect.')
2. ('rett', 'Rett syndrome is an almost entirely female based disorder which affects girls.')
3. ('tourette', "Tourette's is normally inherited, and starts in childhood.")
4. ('pompe', "Glycogen storage disease type II (Pompe's Disease) is a disease that causes glycogen to accumulate in lysosomes.")
5. ('marfan', 'Aortic dissection is more common in those with a history of high blood pressure, a number of connective tissue diseases that affect blood vessel wall strength such as Marfan syndrome, a bicuspid aortic valve, and previous heart surgery.')
6. ('lassa', 'Lassa fever or Lassa hemorrhagic fever (LHF) is a fever caused by the Lassa virus.')
7. ('chagas', 'Chagas disease is a disease caused by the protozoan p

 70%|███████   | 61/87 [07:15<02:35,  5.98s/it]

 
query_sent:  Most cases of Patau syndrome are not inherited, but occur as random events during the formation of reproductive cells (eggs and sperm).
query_word:  Patau
Top substitutions:
1. ('rett', 'People with Rett syndrome also have scoliosis.')
2. ('marfan', 'Aortic dissection is more common in those with a history of high blood pressure, a number of connective tissue diseases that affect blood vessel wall strength such as Marfan syndrome, a bicuspid aortic valve, and previous heart surgery.')
3. ('joubert', 'Mutations at 21 different loci have been found to cause Joubert syndrome.')
4. ('pompe', "Glycogen storage disease type II (Pompe's Disease) is a disease that causes glycogen to accumulate in lysosomes.")
5. ('tourette', "Fewer than 15% of people with Tourette's have coprolalia.")
6. ('lassa', 'If a patient is diagnosed with Lassa fever, then the patient will be kept away from other people, to prevent the spread of the virus.')
7. ('pica', 'A person with pica may also have a

 71%|███████▏  | 62/87 [07:20<02:19,  5.59s/it]

 
query_sent:  But a baby with Patau syndrome has 3 copies of chromosome 13, instead of 2.
query_word:  Patau
Top substitutions:
1. ('marfan', 'Aortic dissection is more common in those with a history of high blood pressure, a number of connective tissue diseases that affect blood vessel wall strength such as Marfan syndrome, a bicuspid aortic valve, and previous heart surgery.')
2. ('tourette', "Fewer than 15% of people with Tourette's have coprolalia.")
3. ('joubert', 'Mutations at 21 different loci have been found to cause Joubert syndrome.')
4. ('pompe', "Glycogen storage disease type II (Pompe's Disease) is a disease that causes glycogen to accumulate in lysosomes.")
5. ('rett', 'People with Rett syndrome also have scoliosis.')
6. ('raynaud', ':Asthma   :Raynaud’s disease: is a circulatory disorder in which the smaller arteries that supply blood to the extremities - most often the hands, but it may also affect the, toes, the tip of the nose and the ears - become narrower reducing 

 72%|███████▏  | 63/87 [07:24<02:06,  5.26s/it]

 
query_sent:  Nora is diagnosed with Trisomy 13.
query_word:  Trisomy
Top substitutions:
1. ('autosomal', 'It is the second most common autosomal trisomy, after Down Syndrome, that carries to term.')
2. ('recessive', 'The disorder is X-linked recessive.')
3. ('chromosomal', "The most common chromosomal disorder is Down syndrome ('Mongolism').")
4. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
5. ('gestational', 'The doctor or nurse will ask certain questions to check if the mother is at high risk of getting gestational diabetes.')
6. ('aneuploidy', 'Among those who survive birth, Down syndrome is the most common form of aneuploidy.')
7. ('polyploidy', 'Trisomy   Polyploidy')
8. ('allele', 'It is caused by a dominant allele.')
9. ('anomalies', 'All other physical anomalies together occur in 6 per 1000 live births.')
10. ('hormonal', 'This can mean many things because you can inherit your body type, shape, and hormonal function from your parents.

 74%|███████▎  | 64/87 [07:28<01:52,  4.89s/it]

 
query_sent:  The patient has trisomy 13.
query_word:  trisomy
Top substitutions:
1. ('autosomal', 'It is the second most common autosomal trisomy, after Down Syndrome, that carries to term.')
2. ('chromosomal', "The most common chromosomal disorder is Down syndrome ('Mongolism').")
3. ('genotype', 'About 65% of people with genotype 4 have a good, stable response with 48 weeks of treatment.')
4. ('recessive', 'The disorder is X-linked recessive.')
5. ('aneuploidy', 'Among those who survive birth, Down syndrome is the most common form of aneuploidy.')
6. ('polyploidy', 'Trisomy   Polyploidy')
7. ('genotypes', 'In people with genotypes 2 and 3, about 70-80% have a good, stable response with 24 weeks of treatment.')
8. ('allele', 'It is caused by a dominant allele.')
9. ('hypersomnia', 'Narcolepsy is an example of a primary hypersomnia.')
10. ('cystic', 'The parent might not have cystic fibrosis but still might have the gene.')
11. ('gestational', 'It is still unclear what causes gestati

 75%|███████▍  | 65/87 [07:32<01:41,  4.63s/it]

 
query_sent:  The patient has trisomy 21.
query_word:  trisomy
Top substitutions:
1. ('autosomal', 'It is the second most common autosomal trisomy, after Down Syndrome, that carries to term.')
2. ('chromosomal', "The most common chromosomal disorder is Down syndrome ('Mongolism').")
3. ('genotype', 'About 65% of people with genotype 4 have a good, stable response with 48 weeks of treatment.')
4. ('recessive', 'The disorder is X-linked recessive.')
5. ('aneuploidy', 'Among those who survive birth, Down syndrome is the most common form of aneuploidy.')
6. ('polyploidy', 'Trisomy   Polyploidy')
7. ('genotypes', 'In people with genotypes 2 and 3, about 70-80% have a good, stable response with 24 weeks of treatment.')
8. ('hypersomnia', 'Narcolepsy is an example of a primary hypersomnia.')
9. ('allele', 'It is caused by a dominant allele.')
10. ('gestational', 'It is still unclear what causes gestational diabetes.')
11. ('anomalies', 'All other physical anomalies together occur in 6 per 10

 76%|███████▌  | 66/87 [07:38<01:44,  4.99s/it]

 
query_sent:  Rarely very low blood pressure may be the only sign of anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
2. ('vasculitis', 'Lymphangitis is sometimes considered a type of vasculitis.')
3. ('pneumothorax', 'Some other symptoms of Pneumothorax are shortness of breath, rapid breathing, and coughing.')
4. ('bronchiectasis', 'People with bronchiectasis may also cough up blood.')
5. ('hypotension', 'Doctors can treat hypotension by treating whatever is causing the low blood pressure.')
6. ('encephalopathy', ',other forms like encephalopathy or delirium may develop relatively slowly, over a number of years.')
7. ('anaphylactic', 'There is no reason why it should not be used if a person is having an anaphylactic reaction.')
8. ('encephalitis', 'Usually, adults with encephalitis have a fever that starts suddenly, a headache, confusion, and sometimes seizures.')
9. ('tachycardia', 'Ventricular tach

 77%|███████▋  | 67/87 [07:44<01:43,  5.17s/it]

 
query_sent:  Worldwide, 0.05-2% of the population is estimated to have anaphylaxis at some point in life, and rates appear to be increasing.
query_word:  anaphylaxis
Top substitutions:
1. ('hypoglycemia', 'Because of this, severe (very bad) hypoglycemia can cause serious medical problems (like seizures) or even death if it is not treated quickly enough.')
2. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
3. ('narcolepsy', 'It is estimated that between 25 and 50 people, per 100,000 suffer from narcolepsy.')
4. ('trichomoniasis', 'Every year, about 248 million more people get trichomoniasis.')
5. ('anaphylactic', 'However, half of the people who die of anaphylaxis have had no anaphylactic reaction before.')
6. ('porphyria', 'When someone is affected by porphyria they will start to lose their hair about two weeks after having an attack.')
7. ('diphtheria', 'Infected people who do not know they have diphtheria are called carriers of diphtheria, because

 78%|███████▊  | 68/87 [07:50<01:46,  5.58s/it]

 
query_sent:  On a mechanistic level, anaphylaxis is caused by the release of mediators from certain types of white blood cells triggered either by immunologic or non-immunologic mechanisms.
query_word:  anaphylaxis
Top substitutions:
1. ('encephalitis', "Viral encephalitis can also be caused by a latent virus - a virus that hides from the body's immune system the brain's nerve cells.")
2. ('anaphylactic', 'However, half of the people who die of anaphylaxis have had no anaphylactic reaction before.')
3. ('hypoglycemia', 'Hypoglycemia can happen to anyone, at any age, but it usually happens in people who are diabetic.')
4. ('neuropathy', 'Peripheral neuropathy happens when the nerves of the peripheral nervous system suffer damage due to disease, trauma to the nerves or through side effects of a certain illness.')
5. ('eosinophilic', 'Eosinophilic pneumonia often occurs in response to infection with a parasite or after exposure to certain types of environmental factors.')
6. ('bronchosp

 79%|███████▉  | 69/87 [07:56<01:40,  5.57s/it]

 
query_sent:  Previous systemic reactions, which are anything more than a local reaction around the site of the sting, are a risk factor for future anaphylaxis; however, half of fatalities have had no previous systemic reaction.
query_word:  anaphylaxis
Top substitutions:
1. ('hypoglycemia', 'Because of this, severe (very bad) hypoglycemia can cause serious medical problems (like seizures) or even death if it is not treated quickly enough.')
2. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
3. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
4. ('anemia', 'Gout often makes these other problems worse or harder to treat: polycythemia, lead poisoning, renal failure, hemolytic anemia, psoriasis, and solid organ transplants.')
5. ('arrhythmias', 'Large exposures can result in loss of consciousness, arrhythmias, seizures, or death.')
6. ('metastasis', 'However, oral cancer

 80%|████████  | 70/87 [08:02<01:38,  5.81s/it]

 
query_sent:  In a person who died from anaphylaxis, autopsy may show an "empty heart" attributed to reduced venous return from vasodilation and redistribution of intravascular volume from the central to the peripheral compartment.
query_word:  anaphylaxis
Top substitutions:
1. ('encephalitis', 'Usually, adults with encephalitis have a fever that starts suddenly, a headache, confusion, and sometimes seizures.')
2. ('hypoglycemia', 'Very bad hypoglycemia can look exactly like a stroke.')
3. ('encephalopathy', ',other forms like encephalopathy or delirium may develop relatively slowly, over a number of years.')
4. ('hypotension', 'Doctors can treat hypotension by treating whatever is causing the low blood pressure.')
5. ('anaphylactic', 'They usually have fewer anaphylactic episodes, which are less severe, as they get older.')
6. ('anorexia', 'The person may also have anorexia nervosa.')
7. ('tachycardia', 'Some people with ventricular tachycardia do not have any symptoms, especially if

 82%|████████▏ | 71/87 [08:09<01:39,  6.20s/it]

 
query_sent:  Anaphylaxis can occur in response to almost any foreign substance.
query_word:  Anaphylaxis
Top substitutions:
1. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')


 83%|████████▎ | 72/87 [08:15<01:31,  6.10s/it]

 
query_sent:  Many foods can trigger anaphylaxis; this may occur upon the first known ingestion.
query_word:  anaphylaxis
Top substitutions:
1. ('encephalitis', 'Examples of other bacteria which can cause encephalitis are Staphylococcus aureus, which causes toxic shock syndrome; Bordetella pertussis, which causes pertussis (whooping cough); and types of Borellia bacteria, which cause Lyme disease.')
2. ('hypoglycemia', 'Bad hypoglycemia can make a person act like they are drunk or have taken drugs.')
3. ('dysentery', 'The main symptom of dysentery is having blood in the excrement.')
4. ('anaphylactic', 'The risk of having another anaphylactic reaction decreases as a person avoids the trigger for longer and longer.')
5. ('encephalopathy', ',other forms like encephalopathy or delirium may develop relatively slowly, over a number of years.')
6. ('diphtheria', 'Diphtheria can cause serious problems, like:   Inflammation of the heart, which can lead to death   Different forms of paralysis,

 84%|████████▍ | 73/87 [08:22<01:28,  6.30s/it]

 
query_sent:  People prone to anaphylaxis are advised to have an "allergy action plan."
query_word:  anaphylaxis
Top substitutions:
1. ('hypoglycemia', 'Because of this, severe (very bad) hypoglycemia can cause serious medical problems (like seizures) or even death if it is not treated quickly enough.')
2. ('encephalitis', 'Usually, adults with encephalitis have a fever that starts suddenly, a headache, confusion, and sometimes seizures.')
3. ('bronchiectasis', 'Some people with bronchiectasis make sputum that is green or pale yellow.')
4. ('anaphylactic', "These plans often include:   Parents telling schools about their children's allergies and what to do in case of an anaphylactic emergency   Knowing how and when to use EpiPens   Wearing a medical alert bracelet that says what the person is allergic to   Planning for how to avoid triggers   If anaphylaxis is diagnosed and the person is treated quickly, there is a good chance that they will recover.")
5. ('diphtheria', 'Croup due to 

 85%|████████▌ | 74/87 [08:27<01:15,  5.84s/it]

 
query_sent:  The antitoxin could cause anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
2. ('thrombosis', 'This delivery system can cause sepsis and thrombosis.')
3. ('anaphylactic', 'There is no reason why it should not be used if a person is having an anaphylactic reaction.')
4. ('encephalitis', 'In 2013, encephalitis killed about 77,000 people in the world.')
5. ('bronchiectasis', 'Antibiotics are used to treat bronchiectasis.')
6. ('vasculitis', 'Lymphangitis is sometimes considered a type of vasculitis.')
7. ('arrhythmias', 'Large exposures can result in loss of consciousness, arrhythmias, seizures, or death.')
8. ('allergies', 'Versions of this medication that are less prone to cause allergies are in development.')
9. ('diphtheria', 'There is a vaccine which can prevent diphtheria.')
10. ('emphysema', 'A history of exposure to cocaine, methamphetamine, alcohol

 86%|████████▌ | 75/87 [08:31<01:06,  5.55s/it]

 
query_sent:  A food allergy explains the anaphylaxis.
query_word:  anaphylaxis
Top substitutions:
1. ('allergy', 'If it is a food allergy, the person may get an upset stomach.')
2. ('encephalitis', 'This is called " secondary encephalitis. "')
3. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
4. ('anaphylactic', 'There is no reason why it should not be used if a person is having an anaphylactic reaction.')
5. ('allergies', 'The reasons may be malnutrition, food intolerances or allergies.')
6. ('hemorrhagic', 'However, a small number of people get dengue hemorrhagic fever or dengue shock syndrome.')
7. ('metastasis', 'Brain tumor   Lyme disease   Influenza   Brain metastasis   Foodborne Intoxication   Sinusitis   Meningitis   Encephalitis   In many cases headaches can be relieved naturally.')
8. ('gonorrhea', 'Sometimes antibiotics do not cure gonorrhea.')
9. ('emphysema', 'A history of exposure to cocaine, methamphetamine,

 87%|████████▋ | 76/87 [08:35<00:56,  5.11s/it]

 
query_sent:  They die of dysentery.
query_word:  dysentery
Top substitutions:
1. ('encephalitis', 'There are also vaccines for rabies, bacterial meningitis, Japanese encephalitis, Human Papillomavirus (HPV), and some other diseases that can cause encephalitis.')
2. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
3. ('hemorrhagic', 'However, a small number of people get dengue hemorrhagic fever or dengue shock syndrome.')
4. ('diarrhoea', 'Lean and weak, swelling limbs, less developed muscles, dry skin, diarrhoea, etc.., are the symptoms of this disease.')
5. ('bronchitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
6. ('arrhythmias', 'Large exposures can result in loss of consciousness, arrhythmias, seizures, or death.')
7. ('rheumatic', 'Wolfgang Mozart, the famous composer famously died of rheumatic fever.')
8. ('hemorrhoids', 'Women

 89%|████████▊ | 77/87 [08:42<00:56,  5.66s/it]

 
query_sent:  Henry II moved in support of Richard, and Henry the Young King died from dysentery at the end of the campaign.
query_word:  dysentery
Top substitutions:
1. ('encephalitis', 'It is not usually considered life-threatening but in 1976 a girl of 5 years was prescribed with steroids at Great Ormond Street Hospital to lessen her acute arthritic pain and died at home on 30th December after a swift attack of encephalitis.')
2. ('diarrhea', 'Because digestion has suddenly stopped, the person may have nausea, diarrhea, or other gastrointestinal problems.')
3. ('diarrhoea', 'Lean and weak, swelling limbs, less developed muscles, dry skin, diarrhoea, etc.., are the symptoms of this disease.')
4. ('typhoid', 'Other causes of jaundice are pernicious anaemia and diseases affecting the liver such as typhoid, malaria, yellow fever and tuberculosis.')
5. ('hypoglycemia', 'Causes of pallor may include migraines, headache, hypoglycemia, anemia or scarlet fever.')
6. ('syphilis', 'Syphilis c

 90%|████████▉ | 78/87 [08:50<00:55,  6.16s/it]

 
query_sent:  Once at Richmond, Madison began drafting the Report, though he was delayed by a weeklong battle with dysentery.
query_word:  dysentery
Top substitutions:
1. ('diarrhea', 'Because digestion has suddenly stopped, the person may have nausea, diarrhea, or other gastrointestinal problems.')
2. ('typhoid', 'Other causes of jaundice are pernicious anaemia and diseases affecting the liver such as typhoid, malaria, yellow fever and tuberculosis.')
3. ('bronchitis', 'The common cold primarily affects the nose, pharyngitis primarily affects the throat, and bronchitis primarily affects the lungs.')
4. ('diarrhoea', 'Symptoms include cough, sore throat, muscle aches and pains, fever, headache, and rarely vomiting and diarrhoea.')
5. ('syphilis', 'Syphilis can cause a pregnant woman to have a miscarriage, a stillbirth, a premature baby, a baby with congenital syphilis, or a baby that dies before it is a month old.')
6. ('measles', 'If a person kisses or touches another person who is i

 91%|█████████ | 79/87 [08:57<00:50,  6.35s/it]

 
query_sent:  Psoriasis varies in severity from small, localized patches to complete body coverage.
query_word:  Psoriasis
Top substitutions:
1. ('hemorrhoids', 'Hemorrhoids are diagnosed by a doctor.')
2. ('anaphylaxis', 'Anaphylaxis has many symptoms, such as an itchy rash, throat swelling, breathing problems, and low blood pressure.')
3. ('hypoglycemia', 'Hypoglycemia is treated by returning the blood sugar levels back to normal.')
4. ('ulcers', 'Ulcers can be from pressure on a part of the skin.')
5. ('lymphangitis', 'Lymphangitis is sometimes considered a type of vasculitis.')
6. ('encephalitis', 'Encephalitis can cause serious symptoms, like seizures and strokes, and can be fatal.')
7. ('erythema', 'Erythema ab igne is a rash of the skin, which is usually caused by the long-term exposure to heat or infrared radiation, and which is not a burn.')
8. ('sarcoidosis', 'Sarcoidosis may involve the brain.')
9. ('gastritis', 'Sometimes gastritis starts after major surgery, serious injur

 92%|█████████▏| 80/87 [09:04<00:45,  6.55s/it]

 
query_sent:  Psoriasis is generally thought to be a genetic disease that is triggered by environmental factors.
query_word:  Psoriasis
Top substitutions:
1. ('encephalitis', 'Encephalitis can happen when a person gets a bacterial infection, like bacterial meningitis.')
2. ('bronchiectasis', 'Bronchiectasis is an obstructive lung disease.')
3. ('metastasis', 'Metastasis is what happens when cancer spreads from its original place to another part of the body.')
4. ('anaphylaxis', 'Anaphylaxis happens when a person eats, breathes in, or is injected with an allergen (something they are allergic to).')
5. ('candidiasis', 'Candidiasis, often called yeast infection or thrush, is a type of infectious disease.')
6. ('sarcoidosis', 'Sarcoidosis is a granulomatous disorder that can affect multiple organs.')
7. ('erythema', 'Erythema ab igne is a rash of the skin, which is usually caused by the long-term exposure to heat or infrared radiation, and which is not a burn.')
8. ('hypochondriasis', 'Hy

 93%|█████████▎| 81/87 [09:11<00:40,  6.78s/it]

 
query_sent:  There are five main types of psoriasis: plaque, guttate, inverse, pustular, and erythrodermic.
query_word:  psoriasis
Top substitutions:
1. ('osteoporosis', 'There are three kinds of osteoporosis.')
2. ('candidiasis', 'One sign of esophageal candidiasis is painful swallowing.')
3. ('hemorrhoids', 'Symptoms of internal hemorrhoids include\xa0itching, bleeding, swelling, mucus discharge, burning sensation, prolapse, and soiling.')
4. ('hypoglycemia', 'In the worst cases, when the blood sugar is very low, hypoglycemia is treated by giving sugar water intravenously (through a needle placed into a vein).')
5. ('psoriatic', 'Other kinds of arthritis include psoriatic arthritis and septic arthritis (when an area is invaded by bacteria).')
6. ('pneumothorax', 'Some other symptoms of Pneumothorax are shortness of breath, rapid breathing, and coughing.')
7. ('bronchiectasis', 'The most common symptom of bronchiectasis is a cough that does not go away.')
8. ('schistosomiasis', 'The

 94%|█████████▍| 82/87 [09:18<00:34,  6.89s/it]

 
query_sent:  If one twin has psoriasis, the other twin is three times more likely to be affected if the twins are identical than if they are non-identical.
query_word:  psoriasis
Top substitutions:
1. ('bronchiectasis', '3 out of 4 people with bronchiectasis often cough up a lot of sputum.')
2. ('anaphylaxis', 'Also, these tests cannot say for sure that a person does not have anaphylaxis.')
3. ('hypoglycemia', 'Bad hypoglycemia can make a person act like they are drunk or have taken drugs.')
4. ('hypersomnia', 'People with hypersomnia easily feel sleepy during the day, and fall asleep.')
5. ('osteoporosis', 'It is more likely for a woman to get osteoporosis than a man.')
6. ('candidiasis', 'Weight loss can happen when a person has esophageal candidiasis for a long time.')
7. ('glaucoma', 'A family history of glaucoma is a risk factor.')
8. ('hemorrhoids', 'Women are in danger of having hemorrhoids during pregnancy.')
9. ('psoriatic', 'Other kinds of arthritis include psoriatic arthri

 95%|█████████▌| 83/87 [09:22<00:24,  6.16s/it]

 
query_sent:  High doses may lead to muscles contractions.
query_word:  contractions
Top substitutions:
1. ('spasms', 'The condition leads to general muscle stiffness and spasms in other parts of the body.')
2. ('spasm', 'Muscle cramps, stiffness, and spasm can also be associated with myopathy.')
3. ('cramps', 'Muscle cramps, stiffness, and spasm can also be associated with myopathy.')
4. ('stiffness', 'Muscle cramps, stiffness, and spasm can also be associated with myopathy.')
5. ('twitches', 'Losses of muscle bulk and muscle twitches may be seen.')
6. ('tremors', 'Epinephrine can cause minor side effects, including tremors, anxiety, headaches, and palpitations.')
7. ('pain', 'This can cause weakness and muscle pain.')
8. ('headaches', 'Bruxism can wear down teeth, make them too sensitive, and cause headaches and jaw pain.')
9. ('aches', 'Some also have muscle aches, headache, tiredness, loss of appetite, diarrhea or vomiting.')
10. ('tiredness', 'It causes weak muscles and tiredness

 97%|█████████▋| 84/87 [09:27<00:17,  5.71s/it]

 
query_sent:  High doses may lead to convulsions.
query_word:  convulsions
Top substitutions:
1. ('hallucinations', 'Sleep paralysis may be accompanied by hallucinations of sight, hearing or touch.')
2. ('diarrhea', 'It causes diarrhea, vomiting and stomach pain.')
3. ('hyperactivity', 'This can cause seizures and hyperactivity.')
4. ('arrhythmias', 'Large exposures can result in loss of consciousness, arrhythmias, seizures, or death.')
5. ('delirium', ',other forms like encephalopathy or delirium may develop relatively slowly, over a number of years.')
6. ('incontinence', 'Sometimes, the only symptoms are incontinence (loss of bladder control), a change in mental status (ability to think), or feeling tired.')
7. ('anaphylaxis', 'Many foods can trigger anaphylaxis, even when the food is eaten for the first time.')
8. ('psychosis', 'Very bad mania can cause psychosis, with hallucinations and delusions.')
9. ('epileptic', 'Very often, the actual cause for an epileptic seizure is lack of

 98%|█████████▊| 85/87 [09:33<00:11,  5.81s/it]

 
query_sent:  To reduce the encephalocele cyst.
query_word:  encephalocele
Top substitutions:
1. ('rhabdomyosarcoma', 'This is the most common type of rhabdomyosarcoma.')
2. ('encephalopathy', 'Transmissible spongiform encephalopathy')
3. ('cystocele', 'The doctor who fixes the cystocele may fold the weak tissue and then stitch it back in place.')
4. ('toxoplasmosis', 'Toxoplasmosis Toxoplasma gondii parasite Pigs, lambs, deer, cattle Eating under-cooked meat.')
5. ('bronchiectasis', 'René Laennec wrote the first description of bronchiectasis in 1819.')
6. ('hepatosplenomegaly', 'Hepatomegaly -enlarged liver, splenomegaly - enlarged spleen, or both hepatosplenomegaly.')
7. ('ovary', 'It is a pus-filled ovary and Fallopian tube that is filled with infection.')
8. ('astrocytomas', 'Low-grade astrocytomas usually grow slowly and stay in one place.')
9. ('epithelium', 'RSV does cause epithelium damage.')
10. ('toxoplasma', 'Toxoplasmosis Toxoplasma gondii parasite Cats Cat feces (in cat l

 99%|█████████▉| 86/87 [09:41<00:06,  6.44s/it]

 
query_sent:  Epistaxis in children is usually from Little's area, which is on the septal wall anteriorly.
query_word:  Epistaxis
Top substitutions:
1. ('anaphylaxis', 'Anaphylaxis appears to be getting more common.')
2. ('symptom', 'This symptom may be harder to see in infants who cannot walk yet.')
3. ('epiglottitis', 'Epiglottitis usually happens in children.')
4. ('hypotension', 'Hypotension is low blood pressure.')
5. ('hypoglycemia', 'Hypoglycemia is treated by returning the blood sugar levels back to normal.')
6. ('cyanosis', 'Usually cyanosis is first seen in the extremities (the parts of the body that are farthest away from the heart).')
7. ('ischemia', 'Mesenteric ischemia happens mostly to the elderly (older people).')
8. ('sarcoidosis', 'Sarcoidosis may involve the brain.')
9. ('nystagmus', 'Nystagmus can be hereditary, but it can also be a result of something else.')
10. ('vasculitis', 'Vasculitis is primarily caused by leukocyte migration and resultant damage.')
11. ('cy

100%|██████████| 87/87 [09:48<00:00,  6.77s/it]

 
query_sent:  Once epistaxis occurs, the importance of the first treatment for the haemostasis should be emphasized.
query_word:  epistaxis
Top substitutions:
1. ('anaphylaxis', 'Anaphylaxis usually happens because the immune system over-reacts to an allergen.')
2. ('hypoglycemia', 'Hypoglycemia is treated by returning the blood sugar levels back to normal.')
3. ('epilepsy', 'Most forms of epilepsy cannot be cured.')
4. ('encephalitis', 'Encephalitis can cause serious symptoms, like seizures and strokes, and can be fatal.')
5. ('prosopagnosia', 'Prosopagnosia is not curable or treatable.')
6. ('trichomoniasis', 'About 95% to 97% of people with trichomoniasis are cured after one dose of metronidazole.')
7. ('neurosyphilis', 'Instead, people with neurosyphilis usually need to be given large doses of penicillin for at least 10 days.')
8. ('prostatitis', 'Sepsis from prostatitis is very rare, but may occur in immunocompromised patients; high fever and malaise generally prompt blood cultur


