In [2]:
%%capture
!pip install scispacy
!pip install 'https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_md-0.5.0.tar.gz'

In [3]:
import spacy
import scispacy
import pandas as pd
import en_core_sci_md
from scispacy.linking import EntityLinker

In [5]:
entities_dataframe = pd.read_csv('bionlp_entities.csv')

In [6]:
entities_dataframe

Unnamed: 0,Entity,Label,Ner_model
0,patients,ORGANISM,bionlp13cg
1,loperamide hydrochloride,SIMPLE_CHEMICAL,bionlp13cg
2,sodium chloride,SIMPLE_CHEMICAL,bionlp13cg
3,gut-liver,CELLULAR_COMPONENT,bionlp13cg
4,lymphocytes,CELL,bionlp13cg
...,...,...,...
96,electrolytes,CELLULAR_COMPONENT,bionlp13cg
97,lymphocyte,CELL,bionlp13cg
98,C-reactive protein,GENE_OR_GENE_PRODUCT,bionlp13cg
99,glutathione,SIMPLE_CHEMICAL,bionlp13cg


In [7]:
mesh_nlp = spacy.load("en_core_sci_md")
mesh_nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "mesh"}) #MeSH contains ~30k entities
linker = mesh_nlp.get_pipe("scispacy_linker")
def mesh_entity_linker(document):
    doc = mesh_nlp(document)
    try:
        entity = doc.ents[0]
    except IndexError:
        entity = 'Nan'
    entity_details = []
    entity_details.append(entity)
    try:
        for linker_ent in entity._.kb_ents:
            Concept_Id, Score = linker_ent
            entity_details.append('Entity_Matching_Score :{}'.format(Score))
            entity_details.append(linker.kb.cui_to_entity[linker_ent[0]])
    except AttributeError:
        pass
    return entity_details

https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/tfidf_vectors_sparse.npz not found in cache, downloading to /tmp/tmpjf69curo
Finished download, copying /tmp/tmpjf69curo to cache at /root/.scispacy/datasets/0acb1f67e1908d2211efb5291880a946e905e1a14a87c10cfc640d0711f914c7.e4877c46bb5a882e9729b6abe799b33f195067557a3c0c15086a50471f29b985.tfidf_vectors_sparse.npz
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/nmslib_index.bin not found in cache, downloading to /tmp/tmp49f0o902
Finished download, copying /tmp/tmp49f0o902 to cache at /root/.scispacy/datasets/7bad4a37e60db48ee4b5b03dfaa61b195af5b4c69a6850fa5b466103229c263d.4952ca58f4ed53ad673bb387c8f203d92f422dbcc8cfb673ffed9720e7c0af68.nmslib_index.bin
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/tfidf_vectorizer.joblib not found in cache, downloading to /tmp/tmpj0us0966
Finished download, copying /tmp/tmpj0us0966 to cache at /root/.scispacy/da

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/concept_aliases.json not found in cache, downloading to /tmp/tmpl0l_g4uy
Finished download, copying /tmp/tmpl0l_g4uy to cache at /root/.scispacy/datasets/ccb3a55e3a37984902cc7de591d37d56d90eb0962d128536512b8d1219e71bcb.89e92a904a5ccc051bcba6ee26c5744e183dee7197cc835cfeb152b330b44559.concept_aliases.json
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/kbs/2023-04-23/umls_mesh_2022.jsonl not found in cache, downloading to /tmp/tmp3uwj1w9_
Finished download, copying /tmp/tmp3uwj1w9_ to cache at /root/.scispacy/datasets/5541a1df25533cfafec1fdcf0446c761f998591519c8ad4a73876f48d7e0a224.c4d6e393746f18aaf6eafff94fe1782cebf29ef535b101501e66f1e3462cdb09.umls_mesh_2022.jsonl


In [8]:
entities_dataframe['mesh_output'] = entities_dataframe['Entity'].apply(lambda x : mesh_entity_linker(x))

In [9]:
entities_dataframe

Unnamed: 0,Entity,Label,Ner_model,mesh_output
0,patients,ORGANISM,bionlp13cg,"[(patients), Entity_Matching_Score :1.0, (C003..."
1,loperamide hydrochloride,SIMPLE_CHEMICAL,bionlp13cg,"[(loperamide, hydrochloride), Entity_Matching_..."
2,sodium chloride,SIMPLE_CHEMICAL,bionlp13cg,"[(sodium, chloride), Entity_Matching_Score :1...."
3,gut-liver,CELLULAR_COMPONENT,bionlp13cg,[(gut-liver)]
4,lymphocytes,CELL,bionlp13cg,"[(lymphocytes), Entity_Matching_Score :1.0, (C..."
...,...,...,...,...
96,electrolytes,CELLULAR_COMPONENT,bionlp13cg,"[(electrolytes), Entity_Matching_Score :1.0, (..."
97,lymphocyte,CELL,bionlp13cg,"[(lymphocyte), Entity_Matching_Score :1.0, (C0..."
98,C-reactive protein,GENE_OR_GENE_PRODUCT,bionlp13cg,"[(C-reactive, protein), Entity_Matching_Score ..."
99,glutathione,SIMPLE_CHEMICAL,bionlp13cg,"[(glutathione), Entity_Matching_Score :1.0, (C..."


In [10]:
entities_dataframe['mesh_output'][0]

[patients,
 'Entity_Matching_Score :1.0',
 CUI: C0030705, Name: Patients
 Definition: Individuals participating in the health care system for the purpose of receiving therapeutic, diagnostic, or preventive procedures.
 TUI(s): T101
 Aliases: (total: 1): 
 	 Patient,
 'Entity_Matching_Score :0.8274745941162109',
 CUI: C0025360, Name: Mentally Ill Persons
 Definition: Persons with psychiatric illnesses or diseases, particularly psychotic and severe mood disorders.
 TUI(s): T101
 Aliases: (total: 6): 
 	 Mental Patients, Person, Mentally Ill, Ill, Mentally, Mentally Ill Person, Persons, Mentally Ill, Mentally Ill,
 'Entity_Matching_Score :0.7562460899353027',
 CUI: C0029921, Name: Outpatients
 Definition: Persons who receive ambulatory care at an outpatient department or clinic without room and board being provided.
 TUI(s): T101
 Aliases: (total: 4): 
 	 Out-patients, Out-patient, Out patients, Outpatient,
 'Entity_Matching_Score :0.7128358483314514',
 CUI: C0030706, Name: Right, Patient

In [11]:
hpo_nlp = spacy.load("en_core_sci_md")
hpo_nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "hpo"}) #16k concepts focused on phenotypic abnormalities encountered in human disease.
linker = hpo_nlp.get_pipe("scispacy_linker")
def hpo_entity_linker(document):
    doc = hpo_nlp(document)
    try:
        entity = doc.ents[0]
    except IndexError:
        entity = 'Nan'
    entity_details = []
    entity_details.append(entity)
    try:
        for linker_ent in entity._.kb_ents:
            Concept_Id, Score = linker_ent
            entity_details.append('Entity_Matching_Score :{}'.format(Score))
            entity_details.append(linker.kb.cui_to_entity[linker_ent[0]])
    except AttributeError:
        pass
    return entity_details

https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/tfidf_vectors_sparse.npz not found in cache, downloading to /tmp/tmpym8njx3e
Finished download, copying /tmp/tmpym8njx3e to cache at /root/.scispacy/datasets/ce11d8a176fa1830308fc265ab8845ca877f10c70fa3f74212ff2d9fdd97ab96.029e8ca566e1b5d6ab99138a96aa1c7b050565132aabb6b296a1c870c64d6f9b.tfidf_vectors_sparse.npz
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/nmslib_index.bin not found in cache, downloading to /tmp/tmp8aa91bf8
Finished download, copying /tmp/tmp8aa91bf8 to cache at /root/.scispacy/datasets/066d3db776b9acaff67728a857a1d6625f4c86194a70804ffd5399fa738caa4e.ecc1ac28794235140b2bafbbf81ce0454219cd1e05056786dce65ab17fee53b2.nmslib_index.bin
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/tfidf_vectorizer.joblib not found in cache, downloading to /tmp/tmpx1vgcmlo
Finished download, copying /tmp/tmpx1vgcmlo to cache at /root/.scispacy/datas

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/concept_aliases.json not found in cache, downloading to /tmp/tmpcqy_ty7p
Finished download, copying /tmp/tmpcqy_ty7p to cache at /root/.scispacy/datasets/092c266817935c16682d3a1511bad5bdb7e3665d93da4d2eb21d42fa6b2f4100.298fa9e3ef85c61367c35b4240deae6f06545e2cb68659bbad65602be2dfefab.concept_aliases.json
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/kbs/2023-04-23/umls_hpo_2022.jsonl not found in cache, downloading to /tmp/tmp6wpuhnp3
Finished download, copying /tmp/tmp6wpuhnp3 to cache at /root/.scispacy/datasets/4acfb77195a577a57a9791f9627dcc8c47561d8c2fa4671b9a5ca0e494970e87.b703e72c55ea536eac9c2fcb2d63553d36ea0aadec6a3f525a9eb21998302bc7.umls_hpo_2022.jsonl


In [12]:
entities_dataframe['hpo_output'] = entities_dataframe['Entity'].apply(lambda x : hpo_entity_linker(x))

In [13]:
entities_dataframe

Unnamed: 0,Entity,Label,Ner_model,mesh_output,hpo_output
0,patients,ORGANISM,bionlp13cg,"[(patients), Entity_Matching_Score :1.0, (C003...",[(patients)]
1,loperamide hydrochloride,SIMPLE_CHEMICAL,bionlp13cg,"[(loperamide, hydrochloride), Entity_Matching_...","[(loperamide, hydrochloride)]"
2,sodium chloride,SIMPLE_CHEMICAL,bionlp13cg,"[(sodium, chloride), Entity_Matching_Score :1....","[(sodium, chloride)]"
3,gut-liver,CELLULAR_COMPONENT,bionlp13cg,[(gut-liver)],[(gut-liver)]
4,lymphocytes,CELL,bionlp13cg,"[(lymphocytes), Entity_Matching_Score :1.0, (C...","[(lymphocytes), Entity_Matching_Score :0.90991..."
...,...,...,...,...,...
96,electrolytes,CELLULAR_COMPONENT,bionlp13cg,"[(electrolytes), Entity_Matching_Score :1.0, (...","[(electrolytes), Entity_Matching_Score :0.7037..."
97,lymphocyte,CELL,bionlp13cg,"[(lymphocyte), Entity_Matching_Score :1.0, (C0...","[(lymphocyte), Entity_Matching_Score :0.831573..."
98,C-reactive protein,GENE_OR_GENE_PRODUCT,bionlp13cg,"[(C-reactive, protein), Entity_Matching_Score ...","[(C-reactive, protein), Entity_Matching_Score ..."
99,glutathione,SIMPLE_CHEMICAL,bionlp13cg,"[(glutathione), Entity_Matching_Score :1.0, (C...",[(glutathione)]


In [14]:
entities_dataframe['hpo_output'][4]

[lymphocytes,
 'Entity_Matching_Score :0.9099137783050537',
 CUI: C0221277, Name: Abnormal lymphocyte morphology
 Definition: A lymphocyte that may be irregular or not conforming to type.
 TUI(s): T033
 Aliases: (total: 2): 
 	 Abnormal lymphocytes, Abnormality of cells of the lymphoid lineage,
 'Entity_Matching_Score :0.7441422939300537',
 CUI: C0580550, Name: Abnormal numbers of lymphocytes
 Definition: Any abnormality in the total number of lymphocytes in the blood. []
 TUI(s): T033
 Aliases: (total: 4): 
 	 Abnormal number of lymphocytes, Abnormality of lymphocyte number, Abnormal lymphocyte count, Abnormal lymphocyte counts,
 'Entity_Matching_Score :0.7385052442550659',
 CUI: C0024282, Name: High lymphocyte count
 Definition: Excess of normal lymphocytes in the blood or in any effusion.
 TUI(s): T047
 Aliases: (total: 1): 
 	 Lymphocytosis]

In [15]:
rxnorm_nlp = spacy.load("en_core_sci_md")
rxnorm_nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "rxnorm"}) #RxNorm contains ~100k concepts focused on normalized names for clinical drugs
linker = rxnorm_nlp.get_pipe("scispacy_linker")
def rxnorm_entity_linker(document):
    doc = rxnorm_nlp(document)
    try:
        entity = doc.ents[0]
    except IndexError:
        entity = 'Nan'
    entity_details = []
    entity_details.append(entity)
    try:
        for linker_ent in entity._.kb_ents:
            Concept_Id, Score = linker_ent
            entity_details.append('Entity_Matching_Score :{}'.format(Score))
            entity_details.append(linker.kb.cui_to_entity[linker_ent[0]])
    except AttributeError:
        pass
    return entity_details

https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/tfidf_vectors_sparse.npz not found in cache, downloading to /tmp/tmpbfoeg88x
Finished download, copying /tmp/tmpbfoeg88x to cache at /root/.scispacy/datasets/68e7f1197d5852698808a5f9d694026c210e4b93a7e496dea608a46fff914774.e9a1075d5c32b5e7a180b60a96b15fc072ea714b95dd458047a48ccf2bb065be.tfidf_vectors_sparse.npz
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/nmslib_index.bin not found in cache, downloading to /tmp/tmpismu69yo
Finished download, copying /tmp/tmpismu69yo to cache at /root/.scispacy/datasets/3742ff1d61c637ce7dc935674fa4199810af16978f9a10088d71d37bba16203a.8f798c6f751125a0d68f8b4e82ecfba4fd37bfb2a447d61fba584e208e6af9d3.nmslib_index.bin
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/tfidf_vectorizer.joblib not found in cache, downloading to /tmp/tmp155d06dj
Finished download, copying /tmp/tmp155d06dj to cache at /root/.scisp

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/concept_aliases.json not found in cache, downloading to /tmp/tmpdvh0hyoc
Finished download, copying /tmp/tmpdvh0hyoc to cache at /root/.scispacy/datasets/54a3afac2f157748a3326a13e59ffe165fcc40ce0cceab6dc47303965dc3c0ed.71746c536649e7ba8a47b6cb7a3a7c8e0c447e022bdf819e69fbb1de9276d411.concept_aliases.json
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/kbs/2023-04-23/umls_rxnorm_2022.jsonl not found in cache, downloading to /tmp/tmp0x2fhn5v
Finished download, copying /tmp/tmp0x2fhn5v to cache at /root/.scispacy/datasets/afd8034c6b1a9b6e9eb94a5688ab043023fb450ddf36c88b9f78efa21c5b2d0a.7afae38a116c40277e6052ddcfcd0013fb8136a6d4f96d965ccc7689e8543712.umls_rxnorm_2022.jsonl


In [16]:
entities_dataframe['rxnorm_output'] = entities_dataframe['Entity'].apply(lambda x : rxnorm_entity_linker(x))

In [17]:
entities_dataframe

Unnamed: 0,Entity,Label,Ner_model,mesh_output,hpo_output,rxnorm_output
0,patients,ORGANISM,bionlp13cg,"[(patients), Entity_Matching_Score :1.0, (C003...",[(patients)],[(patients)]
1,loperamide hydrochloride,SIMPLE_CHEMICAL,bionlp13cg,"[(loperamide, hydrochloride), Entity_Matching_...","[(loperamide, hydrochloride)]","[(loperamide, hydrochloride), Entity_Matching_..."
2,sodium chloride,SIMPLE_CHEMICAL,bionlp13cg,"[(sodium, chloride), Entity_Matching_Score :1....","[(sodium, chloride)]","[(sodium, chloride), Entity_Matching_Score :1...."
3,gut-liver,CELLULAR_COMPONENT,bionlp13cg,[(gut-liver)],[(gut-liver)],[(gut-liver)]
4,lymphocytes,CELL,bionlp13cg,"[(lymphocytes), Entity_Matching_Score :1.0, (C...","[(lymphocytes), Entity_Matching_Score :0.90991...",[(lymphocytes)]
...,...,...,...,...,...,...
96,electrolytes,CELLULAR_COMPONENT,bionlp13cg,"[(electrolytes), Entity_Matching_Score :1.0, (...","[(electrolytes), Entity_Matching_Score :0.7037...",[(electrolytes)]
97,lymphocyte,CELL,bionlp13cg,"[(lymphocyte), Entity_Matching_Score :1.0, (C0...","[(lymphocyte), Entity_Matching_Score :0.831573...",[(lymphocyte)]
98,C-reactive protein,GENE_OR_GENE_PRODUCT,bionlp13cg,"[(C-reactive, protein), Entity_Matching_Score ...","[(C-reactive, protein), Entity_Matching_Score ...","[(C-reactive, protein)]"
99,glutathione,SIMPLE_CHEMICAL,bionlp13cg,"[(glutathione), Entity_Matching_Score :1.0, (C...",[(glutathione)],"[(glutathione), Entity_Matching_Score :1.0, (C..."


In [18]:
entities_dataframe['rxnorm_output'][99]

[glutathione,
 'Entity_Matching_Score :1.0',
 CUI: C0017817, Name: glutathione
 Definition: A tripeptide with many roles in cells. It conjugates to drugs to make them more soluble for excretion, is a cofactor for some enzymes, is involved in protein disulfide bond rearrangement and reduces peroxides.
 TUI(s): T116, T121, T123
 Aliases: (total: 1): 
 	 L-glutathione]

In [19]:
go_nlp = spacy.load("en_core_sci_md")
go_nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "go"}) #Gene Ontology contains ~67k concepts focused on the functions of genes
linker = go_nlp.get_pipe("scispacy_linker")
def go_entity_linker(document):
    doc = go_nlp(document)
    try:
        entity = doc.ents[0]
    except IndexError:
        entity = 'Nan'
    entity_details = []
    entity_details.append(entity)
    try:
        for linker_ent in entity._.kb_ents:
            Concept_Id, Score = linker_ent
            entity_details.append('Entity_Matching_Score :{}'.format(Score))
            entity_details.append(linker.kb.cui_to_entity[linker_ent[0]])
    except AttributeError:
        pass
    return entity_details

https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/tfidf_vectors_sparse.npz not found in cache, downloading to /tmp/tmpwjhowrey
Finished download, copying /tmp/tmpwjhowrey to cache at /root/.scispacy/datasets/98b21d1968addfd51eceee816a491b7b10de52fbc8f11f22fbf8374d9f881229.0a8a2035151feef72cf9dc0bcda27bda35e86771810a2a4523bae7ea337ae7bb.tfidf_vectors_sparse.npz
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/nmslib_index.bin not found in cache, downloading to /tmp/tmpc_bap78w
Finished download, copying /tmp/tmpc_bap78w to cache at /root/.scispacy/datasets/3ed448934f89223c37be21a402a665d6e3dfcbea9bfd87b1fcd68dbb2f850760.40c7e42a18bea0b2f632b9ec6c299545f1f7d91b2187158ee03380d639eb867f.nmslib_index.bin
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/tfidf_vectorizer.joblib not found in cache, downloading to /tmp/tmpd61y25yf
Finished download, copying /tmp/tmpd61y25yf to cache at /root/.scispacy/datasets

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/concept_aliases.json not found in cache, downloading to /tmp/tmpo1727nh5
Finished download, copying /tmp/tmpo1727nh5 to cache at /root/.scispacy/datasets/e4e99357becdaacb55a07f8b1bcee8d7f6a634ab41db03ab28182a2166f24d4c.5b185b2b9139bc990299750dd4c87979e814ffee13ae3c650bc218c96dbc63ae.concept_aliases.json
https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/kbs/2023-04-23/umls_go_2022.jsonl not found in cache, downloading to /tmp/tmpssoff2yd
Finished download, copying /tmp/tmpssoff2yd to cache at /root/.scispacy/datasets/f2fae68affc838ddf0a87884154533ce359bda3c7d430bb7aa21ae851bee639d.0f776e01d8c81b2c7a6b9b8ffeff2bd7dc23c2b06fdc7719513bd10f1cff9c5a.umls_go_2022.jsonl


In [20]:
entities_dataframe['go_output'] = entities_dataframe['Entity'].apply(lambda x : go_entity_linker(x))

In [21]:
entities_dataframe

Unnamed: 0,Entity,Label,Ner_model,mesh_output,hpo_output,rxnorm_output,go_output
0,patients,ORGANISM,bionlp13cg,"[(patients), Entity_Matching_Score :1.0, (C003...",[(patients)],[(patients)],[(patients)]
1,loperamide hydrochloride,SIMPLE_CHEMICAL,bionlp13cg,"[(loperamide, hydrochloride), Entity_Matching_...","[(loperamide, hydrochloride)]","[(loperamide, hydrochloride), Entity_Matching_...","[(loperamide, hydrochloride)]"
2,sodium chloride,SIMPLE_CHEMICAL,bionlp13cg,"[(sodium, chloride), Entity_Matching_Score :1....","[(sodium, chloride)]","[(sodium, chloride), Entity_Matching_Score :1....","[(sodium, chloride)]"
3,gut-liver,CELLULAR_COMPONENT,bionlp13cg,[(gut-liver)],[(gut-liver)],[(gut-liver)],[(gut-liver)]
4,lymphocytes,CELL,bionlp13cg,"[(lymphocytes), Entity_Matching_Score :1.0, (C...","[(lymphocytes), Entity_Matching_Score :0.90991...",[(lymphocytes)],"[(lymphocytes), Entity_Matching_Score :0.77936..."
...,...,...,...,...,...,...,...
96,electrolytes,CELLULAR_COMPONENT,bionlp13cg,"[(electrolytes), Entity_Matching_Score :1.0, (...","[(electrolytes), Entity_Matching_Score :0.7037...",[(electrolytes)],[(electrolytes)]
97,lymphocyte,CELL,bionlp13cg,"[(lymphocyte), Entity_Matching_Score :1.0, (C0...","[(lymphocyte), Entity_Matching_Score :0.831573...",[(lymphocyte)],"[(lymphocyte), Entity_Matching_Score :0.896024..."
98,C-reactive protein,GENE_OR_GENE_PRODUCT,bionlp13cg,"[(C-reactive, protein), Entity_Matching_Score ...","[(C-reactive, protein), Entity_Matching_Score ...","[(C-reactive, protein)]","[(C-reactive, protein)]"
99,glutathione,SIMPLE_CHEMICAL,bionlp13cg,"[(glutathione), Entity_Matching_Score :1.0, (C...",[(glutathione)],"[(glutathione), Entity_Matching_Score :1.0, (C...","[(glutathione), Entity_Matching_Score :0.83615..."


In [22]:
entities_dataframe['go_output'][4]

[lymphocytes,
 'Entity_Matching_Score :0.7793624997138977',
 CUI: C1326202, Name: B cell apoptotic process
 Definition: Any apoptotic process in a B cell, a lymphocyte of B lineage with the phenotype CD19-positive and capable of B cell mediated immunity. [CL:0000236, GOC:add, GOC:mtg_apoptosis, ISBN:0781735149]
 TUI(s): T043
 Aliases (abbreviated, total: 20): 
 	 B lymphocyte apoptosis, programmed cell death, B lymphocytes, apoptosis of B-lymphocytes, programmed cell death of B-lymphocytes by apoptosis, programmed cell death of B cells by apoptosis, apoptosis of B-cells, B cell programmed cell death by apoptosis, B-cell programmed cell death by apoptosis, programmed cell death, B cells, B-cell apoptosis,
 'Entity_Matching_Score :0.7451572418212891',
 CUI: C0024262, Name: lymphocyte activation
 Definition: Morphologic alteration of small B LYMPHOCYTES or T LYMPHOCYTES in culture into large blast-like cells able to synthesize DNA and RNA and to divide mitotically. It is induced by INTERL