## Import and Data Load

In [None]:
import pandas as pd
import numpy as np
import json
import requests

## PubTator

In [None]:
def pubtator_generate_struct(pubt_data):
    pubt_res = []
    for dic in pubt_data:
        annot_type = dic['infons']['type']
        if annot_type == "Chemical":
            annot_text = dic['text']
            annot_loc = {'start': dic['locations'][0]['offset'],
                        'end': dic['locations'][0]['offset'] + dic['locations'][0]['length']}
            res = {'text': annot_text, 'location': annot_loc}
            pubt_res.append(res)
    return pubt_res

In [None]:
import time

def pubtator_process(id):

    url = f"https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson?pmids={id}&full=true"
    res = requests.get(url)
    print(id)
    res_js = json.loads(res.content)['PubTator3']

    # Only working with abstracts
    pubt_annot = res_js[0]['passages'][1]['annotations']
    display(pubt_annot)
    pubt_res = pubtator_generate_struct(pubt_annot)

    time.sleep(0.6)

    return pubt_res

## Procesar Dataset Positivo

In [None]:
pos_df = pd.read_excel("DatasetPositiveNature.xlsx")
pos_df.dropna(subset=['Abstract'], inplace=True)
pos_df

Unnamed: 0,Title,PMID,Targets,Abstract
0,2-Pyrazol-1-yl-thiazole derivatives as novel h...,31358913,['2-(pyrazol-1-yl)- thiazole'],The present report describes our efforts to id...
1,A choline binding polypeptide of LytA inhibits...,30127421,['ChBp'],Streptococcus pneumoniae is a pathogen that ma...
2,A naturally inspired antibiotic to target mult...,34987225,['Macolacin'],Gram-negative bacteria are responsible for an ...
3,A new class of synthetic retinoid antibiotics ...,29590091,"['CD1530', 'CD437']",A challenge in the treatment of Staphylococcus...
4,"A novel, rationally designed, hybrid antimicro...",32499514,['PA-13'],Antimicrobial peptides (AMPs) are promising al...
...,...,...,...,...
75,Total synthesis and antimicrobial evaluation o...,30181560,"['Albomycin', 'albomycin delta 2', 'grisein', ...",Development of effective antimicrobial agents ...
76,"Venturicidin A, A Membrane-active Natural Prod...",32424122,['Venturicidin A'],Despite the remarkable advances due to the dis...
77,"Walkmycin B targets WalK (YycG), a histidine k...",20057515,['Walkmycin B'],The WalK (a histidine kinase)/WalR (a response...
78,"Wychimicins, a new class of spirotetronate pol...",36071214,['Wychimicin'],In the course of our screening program for new...


In [None]:
pos_df['PMID'] = pos_df['PMID'].astype(int)
pos_df

Unnamed: 0,Title,PMID,Targets,Abstract
0,2-Pyrazol-1-yl-thiazole derivatives as novel h...,31358913,['2-(pyrazol-1-yl)- thiazole'],The present report describes our efforts to id...
1,A choline binding polypeptide of LytA inhibits...,30127421,['ChBp'],Streptococcus pneumoniae is a pathogen that ma...
2,A naturally inspired antibiotic to target mult...,34987225,['Macolacin'],Gram-negative bacteria are responsible for an ...
3,A new class of synthetic retinoid antibiotics ...,29590091,"['CD1530', 'CD437']",A challenge in the treatment of Staphylococcus...
4,"A novel, rationally designed, hybrid antimicro...",32499514,['PA-13'],Antimicrobial peptides (AMPs) are promising al...
...,...,...,...,...
75,Total synthesis and antimicrobial evaluation o...,30181560,"['Albomycin', 'albomycin delta 2', 'grisein', ...",Development of effective antimicrobial agents ...
76,"Venturicidin A, A Membrane-active Natural Prod...",32424122,['Venturicidin A'],Despite the remarkable advances due to the dis...
77,"Walkmycin B targets WalK (YycG), a histidine k...",20057515,['Walkmycin B'],The WalK (a histidine kinase)/WalR (a response...
78,"Wychimicins, a new class of spirotetronate pol...",36071214,['Wychimicin'],In the course of our screening program for new...


In [None]:
pos_df['PubTator'] = pos_df['PMID'].apply(pubtator_process)

31358913


[{'id': '9',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': '2-pyrazol-1-yl-thiazole',
  'locations': [{'offset': 584, 'length': 23}]},
 {'id': '10',
  'infons': {'identifier': '562',
   'type': 'Species',
   'valid': True,
   'normalized': [562],
   'database': 'ncbi_taxonomy',
   'normalized_id': 562,
   'biotype': 'species',
   'name': '562',
   'accession': None},
  'text': 'Escherichia coli',
  'locations': [{'offset': 671, 'length': 16}]},
 {'id': '11',
  'infons': {'identifier': 'MESH:D004917',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D004917'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D004917',
   'biotype': 'chemical',
   'name': 'Erythromycin',
   'accession': '@CHEMICAL_Erythromycin'},
  'text': 'erythromycin',
  'locations': [{'offset': 929, 'length': 12}]},
 {'id': '12',
  'infons': {'identifier': 'MESH:D064704',
   'type': 'Chemical',
   'valid': True,
   '

30127421


[{'id': '13',
  'infons': {'identifier': '1313',
   'type': 'Species',
   'valid': True,
   'normalized': [1313],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1313,
   'biotype': 'species',
   'name': '1313',
   'accession': None},
  'text': 'Streptococcus pneumoniae',
  'locations': [{'offset': 126, 'length': 24}]},
 {'id': '14',
  'infons': {'identifier': 'MESH:D011008',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D011008'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D011008',
   'biotype': 'disease',
   'name': 'Pneumococcal Infections',
   'accession': '@DISEASE_Pneumococcal_Infections'},
  'text': 'pneumococcal infections',
  'locations': [{'offset': 304, 'length': 23}]},
 {'id': '15',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'S. pneumoniae',
  'locations': [{'offset': 401, 'length': 13}]},
 {'id': '16',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
  

34987225


[{'id': '11',
  'infons': {'identifier': 'MESH:D003643',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003643'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003643',
   'biotype': 'disease',
   'name': 'Death',
   'accession': '@DISEASE_Death'},
  'text': 'deaths',
  'locations': [{'offset': 140, 'length': 6}]},
 {'id': '12',
  'infons': {'identifier': 'MESH:D020803',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D020803'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D020803',
   'biotype': 'disease',
   'name': 'Encephalitis Herpes Simplex',
   'accession': '@DISEASE_Encephalitis_Herpes_Simplex'},
  'text': 'infections1,2',
  'locations': [{'offset': 178, 'length': 13}]},
 {'id': '13',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'colistin3',
  'locations': [{'offset': 465, 'length': 9}]},
 {'id': '14',
  'infons': {'identifier': '-',
   'type': 'Chemical',

29590091


[{'id': '24',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Staphylococcus aureus infections',
  'locations': [{'offset': 118, 'length': 32}]},
 {'id': '25',
  'infons': {'identifier': 'MESH:D008712',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D008712'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D008712',
   'biotype': 'chemical',
   'name': 'Methicillin',
   'accession': '@CHEMICAL_Methicillin'},
  'text': 'methicillin',
  'locations': [{'offset': 177, 'length': 11}]},
 {'id': '26',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'S. aureus',
  'locations': [{'offset': 199, 'length': 9}]},
 {'id': '27',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'MRSA',
  'locations': [{'offset': 210, 'length': 4}]},
 {'id': '28',
  'infons': {'identifier': 'MESH:D007239',
   't

32499514


[{'id': '19281',
  'infons': {'identifier': 'MESH:D064420',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D064420'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D064420',
   'biotype': 'disease',
   'name': 'Drug-Related Side Effects and Adverse Reactions',
   'accession': '@DISEASE_Drug_Related_Side_Effects_and_Adverse_Reactions'},
  'text': 'toxicity',
  'locations': [{'offset': 1589, 'length': 8}]},
 {'id': '19282',
  'infons': {'identifier': '287',
   'type': 'Species',
   'valid': True,
   'normalized': [287],
   'database': 'ncbi_taxonomy',
   'normalized_id': 287,
   'biotype': 'species',
   'name': '287',
   'accession': None},
  'text': 'Pseudomonas aeruginosa',
  'locations': [{'offset': 1558, 'length': 22}]},
 {'id': '19283',
  'infons': {'identifier': 'MESH:D006461',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D006461'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D006461',
   'biotype': 'disease',
   'name': 'Hemolysis',
   'accession'

28671682


[{'id': '8',
  'infons': {'identifier': 'MESH:C082735',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C082735'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C082735',
   'biotype': 'chemical',
   'name': 'azetidine',
   'accession': '@CHEMICAL_azetidine'},
  'text': 'azetidine',
  'locations': [{'offset': 466, 'length': 9}]},
 {'id': '9',
  'infons': {'identifier': 'MESH:C000622376',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000622376'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000622376',
   'biotype': 'chemical',
   'name': 'BRD4592',
   'accession': '@CHEMICAL_BRD4592'},
  'text': 'BRD4592',
  'locations': [{'offset': 488, 'length': 7}]},
 {'id': '10',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
  'text': 'Mycobacterium tuberculosis',
  'locations': [{'offset

34707295


[{'id': '25',
  'infons': {'identifier': '629395',
   'type': 'Species',
   'valid': True,
   'normalized': [629395],
   'database': 'ncbi_taxonomy',
   'normalized_id': 629395,
   'biotype': 'species',
   'name': '629395',
   'accession': None},
  'text': 'bacteria',
  'locations': [{'offset': 139, 'length': 8}]},
 {'id': '26',
  'infons': {'identifier': 'MESH:D060467',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D060467'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D060467',
   'biotype': 'disease',
   'name': 'Disease Resistance',
   'accession': '@DISEASE_Disease_Resistance'},
  'text': 'resistance',
  'locations': [{'offset': 391, 'length': 10}]},
 {'id': '27',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'oxepanoproline',
  'locations': [{'offset': 771, 'length': 14}]},
 {'id': '28',
  'infons': {'identifier': 'MESH:D002981',
   'type': 'Chemical',
   'valid': True,
  

35338128


[{'id': '13855',
  'infons': {'identifier': 'MESH:D064420',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D064420'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D064420',
   'biotype': 'disease',
   'name': 'Drug-Related Side Effects and Adverse Reactions',
   'accession': '@DISEASE_Drug_Related_Side_Effects_and_Adverse_Reactions'},
  'text': 'toxicity',
  'locations': [{'offset': 515, 'length': 8}]},
 {'id': '13856',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'polymyxin lipopeptide antibiotics',
  'locations': [{'offset': 205, 'length': 33}]},
 {'id': '13857',
  'infons': {'identifier': '287',
   'type': 'Species',
   'valid': True,
   'normalized': [287],
   'database': 'ncbi_taxonomy',
   'normalized_id': 287,
   'biotype': 'species',
   'name': '287',
   'accession': None},
  'text': 'Pseudomonas aeruginosa',
  'locations': [{'offset': 1142, 'length': 22}]},
 {'id': '1385

24226776


[{'id': '14',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infections',
  'locations': [{'offset': 84, 'length': 10}]},
 {'id': '15',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infection',
  'locations': [{'offset': 562, 'length': 9}]},
 {'id': '16',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infection',
  'loc

30356080


[{'id': '22',
  'infons': {'identifier': '1931',
   'type': 'Species',
   'valid': True,
   'normalized': [1931],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1931,
   'biotype': 'species',
   'name': '1931',
   'accession': None},
  'text': 'Streptomyces sp.',
  'locations': [{'offset': 186, 'length': 16}]},
 {'id': '23',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'ICN19',
  'locations': [{'offset': 203, 'length': 5}]},
 {'id': '24',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'Ala-geninthiocin',
  'locations': [{'offset': 218, 'length': 16}]},
 {'id': '25',
  'infons': {'identifier': 'MESH:C089890',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C089890'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C089890',
   'biotype': 'chemical',
   'name': 'geninthiocin',
   'accession'

31451755


[{'id': '13',
  'infons': {'identifier': '629395',
   'type': 'Species',
   'valid': True,
   'normalized': [629395],
   'database': 'ncbi_taxonomy',
   'normalized_id': 629395,
   'biotype': 'species',
   'name': '629395',
   'accession': None},
  'text': 'bacteria',
  'locations': [{'offset': 113, 'length': 8}]},
 {'id': '14',
  'infons': {'identifier': 'MESH:C046156',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C046156'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C046156',
   'biotype': 'chemical',
   'name': 'albicidin',
   'accession': '@CHEMICAL_albicidin'},
  'text': 'Albicidin',
  'locations': [{'offset': 226, 'length': 9}]},
 {'id': '15',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'sugarcane',
  'locations': [{'offset': 252, 'length': 9}]},
 {'id': '16',
  'infons': {'identifier': '29447',
   'type': 'Species',
   'valid': True,
   'normalized': [29447],
   'database': 'ncbi_taxonomy',

33654250


[{'id': '17',
  'infons': {'identifier': 'MESH:D006679',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D006679'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D006679',
   'biotype': 'disease',
   'name': 'HIV Seropositivity',
   'accession': '@DISEASE_HIV_Seropositivity'},
  'text': 'anti-mannheimiosis',
  'locations': [{'offset': 137, 'length': 18}]},
 {'id': '18',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'aldsulfin',
  'locations': [{'offset': 163, 'length': 9}]},
 {'id': '19',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Lasiodiplodia pseudotheobromae FKI-4499',
  'locations': [{'offset': 222, 'length': 39}]},
 {'id': '20',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'lasiodipline C',
  'locations': [{'offset': 

35715695


[{'id': '22801',
  'infons': {'identifier': '1785',
   'type': 'Species',
   'valid': True,
   'normalized': [1785],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1785,
   'biotype': 'species',
   'name': '1785',
   'accession': None},
  'text': 'Mycobacterium sp',
  'locations': [{'offset': 1189, 'length': 16}]},
 {'id': '22802',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'Pyrimidomycin',
  'locations': [{'offset': 1327, 'length': 13}]},
 {'id': '22803',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'PSAA01',
  'locations': [{'offset': 727, 'length': 6}]},
 {'id': '22804',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'SM02',
  'locations': [{'offset': 891, 'length': 4}]},
 {'id': '22805',
  'infons': {'iden

34489569


[{'id': '46',
  'infons': {'type': 'Disease',
   'valid': False,
   'normalized_id': None,
   'biotype': 'disease'},
  'text': 'biofilm',
  'locations': [{'offset': 170, 'length': 7}]},
 {'id': '47',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'H2-60',
  'locations': [{'offset': 208, 'length': 5}]},
 {'id': '48',
  'infons': {'identifier': 'MESH:C000720758',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000720758'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000720758',
   'biotype': 'chemical',
   'name': 'H2-81',
   'accession': '@CHEMICAL_H2_81'},
  'text': 'H2-81',
  'locations': [{'offset': 218, 'length': 5}]},
 {'id': '49',
  'infons': {'identifier': '1311',
   'type': 'Species',
   'valid': True,
   'normalized': [1311],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1311,
   'biotype': 'species',
   'name': '1311',
   'accession': None},
  'text': 'Streptococc

32533072


[{'id': '12',
  'infons': {'identifier': 'MESH:D000880',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D000880'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D000880',
   'biotype': 'chemical',
   'name': 'Anthraquinones',
   'accession': '@CHEMICAL_Anthraquinones'},
  'text': 'anthraquinone',
  'locations': [{'offset': 91, 'length': 13}]},
 {'id': '13',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'termstrin A, B, C and D (1-4)',
  'locations': [{'offset': 118, 'length': 29}]},
 {'id': '14',
  'infons': {'identifier': '1931',
   'type': 'Species',
   'valid': True,
   'normalized': [1931],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1931,
   'biotype': 'species',
   'name': '1931',
   'accession': None},
  'text': 'Streptomyces sp.',
  'locations': [{'offset': 200, 'length': 16}]},
 {'id': '15',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True

32439988


[{'id': '25',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Pseudomonas protegens UP46',
  'locations': [{'offset': 177, 'length': 26}]},
 {'id': '26',
  'infons': {'identifier': 'MESH:C059817',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C059817'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C059817',
   'biotype': 'chemical',
   'name': '2 4-diacetylphloroglucinol',
   'accession': '@CHEMICAL_2_4_diacetylphloroglucinol'},
  'text': '2,4-diacetylphloroglucinol',
  'locations': [{'offset': 277, 'length': 26}]},
 {'id': '27',
  'infons': {'identifier': 'MESH:C059817',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C059817'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C059817',
   'biotype': 'chemical',
   'name': '2 4-diacetylphloroglucinol',
   'accession': '@CHEMICAL_2_4_diacetylphloroglucinol'},
  'text': 'DAPG',
  'locations': [{'offset': 305, 'length': 4}]},
 {'id': '28',
  '

36806263


[{'id': '26803',
  'infons': {'identifier': '562',
   'type': 'Species',
   'valid': True,
   'normalized': [562],
   'database': 'ncbi_taxonomy',
   'normalized_id': 562,
   'biotype': 'species',
   'name': '562',
   'accession': None},
  'text': 'Escherichia coli',
  'locations': [{'offset': 316, 'length': 16}]},
 {'id': '26804',
  'infons': {'identifier': 'MESH:D012346',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D012346'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D012346',
   'biotype': 'chemical',
   'name': 'RNA Transfer Amino Acyl',
   'accession': '@CHEMICAL_RNA_Transfer_Amino_Acyl'},
  'text': 'aminoacyl-tRNAs',
  'locations': [{'offset': 604, 'length': 15}]},
 {'id': '26805',
  'infons': {'identifier': 'MESH:C005821',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C005821'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C005821',
   'biotype': 'chemical',
   'name': 'thermorubin',
   'accession': '@CHEMICAL_thermorubin'},
  'text': 'Th

32051569


[{'id': '26',
  'infons': {'identifier': 'MESH:C005466',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C005466'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C005466',
   'biotype': 'chemical',
   'name': 'ursolic acid',
   'accession': '@CHEMICAL_ursolic_acid'},
  'text': 'ursolic acid',
  'locations': [{'offset': 144, 'length': 12}]},
 {'id': '27',
  'infons': {'identifier': 'MESH:C005466',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C005466'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C005466',
   'biotype': 'chemical',
   'name': 'ursolic acid',
   'accession': '@CHEMICAL_ursolic_acid'},
  'text': 'UA',
  'locations': [{'offset': 158, 'length': 2}]},
 {'id': '28',
  'infons': {'identifier': '3750',
   'type': 'Species',
   'valid': True,
   'normalized': [3750],
   'database': 'ncbi_taxonomy',
   'normalized_id': 3750,
   'biotype': 'species',
   'name': '3750',
   'accession': None},
  'text': 'apple',
  'locations': [{'offset': 173, 'leng

30993215


[{'id': '20429',
  'infons': {'identifier': '6287478',
   'type': 'Gene',
   'valid': True,
   'normalized': [6287478],
   'database': 'ncbi_gene',
   'normalized_id': 6287478,
   'biotype': 'gene',
   'name': 'glnA',
   'accession': '@GENE_GLNA'},
  'text': 'glutamine synthetase',
  'locations': [{'offset': 472, 'length': 20}]},
 {'id': '20430',
  'infons': {'identifier': 'MESH:C000719071',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000719071'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000719071',
   'biotype': 'chemical',
   'name': 'arsinothricin',
   'accession': '@CHEMICAL_arsinothricin'},
  'text': 'arsinothricin',
  'locations': [{'offset': 1032, 'length': 13}]},
 {'id': '20431',
  'infons': {'identifier': 'MESH:D001153',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D001153'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D001153',
   'biotype': 'chemical',
   'name': 'Arsphenamine',
   'accession': '@CHEMICAL_Arsphenamine'},
  'text'

37264118


[{'id': '44',
  'infons': {'identifier': '1747',
   'type': 'Species',
   'valid': True,
   'normalized': [1747],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1747,
   'biotype': 'species',
   'name': '1747',
   'accession': None},
  'text': 'Cutibacterium acnes',
  'locations': [{'offset': 156, 'length': 19}]},
 {'id': '45',
  'infons': {'identifier': 'MESH:D000152',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D000152'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D000152',
   'biotype': 'disease',
   'name': 'Acne Vulgaris',
   'accession': '@DISEASE_Acne_Vulgaris'},
  'text': 'acne',
  'locations': [{'offset': 179, 'length': 4}]},
 {'id': '46',
  'infons': {'identifier': '9606',
   'type': 'Species',
   'valid': True,
   'normalized': [9606],
   'database': 'ncbi_taxonomy',
   'normalized_id': 9606,
   'biotype': 'species',
   'name': '9606',
   'accession': None},
  'text': 'patients',
  'locations': [{'offset': 184, 'length': 8}]},
 {'id': '47',
  'infon

36056035


[{'id': '19115',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'Staphylococcus aureus',
  'locations': [{'offset': 966, 'length': 21}]},
 {'id': '19116',
  'infons': {'identifier': '1900',
   'type': 'Species',
   'valid': True,
   'normalized': [1900],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1900,
   'biotype': 'species',
   'name': '1900',
   'accession': None},
  'text': 'Streptomyces',
  'locations': [{'offset': 141, 'length': 12}]},
 {'id': '19117',
  'infons': {'identifier': '1282',
   'type': 'Species',
   'valid': True,
   'normalized': [1282],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1282,
   'biotype': 'species',
   'name': '1282',
   'accession': None},
  'text': 'Staphylococcus epidermidis',
  'locations': [{'offset': 1015, 'length': 26}]},
 {'id': '19118',
  'infon

35136191


[{'id': '15',
  'infons': {'identifier': 'MESH:D014376',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D014376'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014376',
   'biotype': 'disease',
   'name': 'Tuberculosis',
   'accession': '@DISEASE_Tuberculosis'},
  'text': 'tuberculosis',
  'locations': [{'offset': 85, 'length': 12}]},
 {'id': '16',
  'infons': {'identifier': 'MESH:D018088',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D018088'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D018088',
   'biotype': 'disease',
   'name': 'Tuberculosis Multidrug-Resistant',
   'accession': '@DISEASE_Tuberculosis_Multidrug_Resistant'},
  'text': '-resistant tuberculosis',
  'locations': [{'offset': 132, 'length': 23}]},
 {'id': '17',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
 

33767207


[{'id': '1034',
  'infons': {'identifier': '46126',
   'type': 'Species',
   'valid': True,
   'normalized': [46126],
   'database': 'ncbi_taxonomy',
   'normalized_id': 46126,
   'biotype': 'species',
   'name': '46126',
   'accession': None},
  'text': 'S. chromogenes',
  'locations': [{'offset': 529, 'length': 14}]},
 {'id': '1035',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Staphylococcus chromogenes ATCC43764',
  'locations': [{'offset': 304, 'length': 36}]},
 {'id': '1036',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'USA300',
  'locations': [{'offset': 929, 'length': 6}]},
 {'id': '1037',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'S. aureus',
  'locations': [{'offset': 1160, 'length': 9}]},
 {'id': '1038',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_

36991235


[{'id': '10',
  'infons': {'identifier': '1883',
   'type': 'Species',
   'valid': True,
   'normalized': [1883],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1883,
   'biotype': 'species',
   'name': '1883',
   'accession': None},
  'text': 'Streptomyces',
  'locations': [{'offset': 173, 'length': 12}]},
 {'id': '11',
  'infons': {'identifier': 'MESH:D061065',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D061065'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D061065',
   'biotype': 'chemical',
   'name': 'Polyketides',
   'accession': '@CHEMICAL_Polyketides'},
  'text': 'polyketides',
  'locations': [{'offset': 601, 'length': 11}]},
 {'id': '12',
  'infons': {'identifier': 'MESH:D013729',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D013729'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D013729',
   'biotype': 'chemical',
   'name': 'Terpenes',
   'accession': '@CHEMICAL_Terpenes'},
  'text': 'terpenes',
  'locations': [{'offset': 645, 'l

36163500


[{'id': '15',
  'infons': {'identifier': 'MESH:C000718067',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000718067'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000718067',
   'biotype': 'chemical',
   'name': 'darobactin',
   'accession': '@CHEMICAL_darobactin'},
  'text': 'Darobactins',
  'locations': [{'offset': 321, 'length': 11}]},
 {'id': '16',
  'infons': {'identifier': '286156',
   'type': 'Species',
   'valid': True,
   'normalized': [286156],
   'database': 'ncbi_taxonomy',
   'normalized_id': 286156,
   'biotype': 'species',
   'name': '286156',
   'accession': None},
  'text': 'Photorhabdus',
  'locations': [{'offset': 345, 'length': 12}]},
 {'id': '17',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'dynobactin A',
  'locations': [{'offset': 570, 'length': 12}]},
 {'id': '18',
  'infons': {'identifier': '286156',
   'type': 'Species',
   'valid': True,
   'norma

29434326


[{'id': '8813',
  'infons': {'identifier': 'MESH:D003643',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003643'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003643',
   'biotype': 'disease',
   'name': 'Death',
   'accession': '@DISEASE_Death'},
  'text': 'death',
  'locations': [{'offset': 239, 'length': 5}]},
 {'id': '8814',
  'infons': {'identifier': 'MESH:C000626256',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000626256'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000626256',
   'biotype': 'chemical',
   'name': 'malacidins',
   'accession': '@CHEMICAL_malacidins'},
  'text': 'malacidins',
  'locations': [{'offset': 1242, 'length': 10}]},
 {'id': '8815',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infections',

28442740


[{'id': '3402',
  'infons': {'identifier': 'MESH:D010068',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D010068'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D010068',
   'biotype': 'chemical',
   'name': 'Oxacillin',
   'accession': '@CHEMICAL_Oxacillin'},
  'text': 'oxacillin',
  'locations': [{'offset': 698, 'length': 9}]},
 {'id': '3403',
  'infons': {'identifier': 'MESH:D003523',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D003523'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003523',
   'biotype': 'chemical',
   'name': 'Cycloserine',
   'accession': '@CHEMICAL_Cycloserine'},
  'text': 'd-cycloserine',
  'locations': [{'offset': 671, 'length': 13}]},
 {'id': '3404',
  'infons': {'identifier': '1314',
   'type': 'Species',
   'valid': True,
   'normalized': [1314],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1314,
   'biotype': 'species',
   'name': '1314',
   'accession': None},
  'text': 'Streptococcus pyogenes',
  'locations':

37231267


[{'id': '11',
  'infons': {'identifier': '470',
   'type': 'Species',
   'valid': True,
   'normalized': [470],
   'database': 'ncbi_taxonomy',
   'normalized_id': 470,
   'biotype': 'species',
   'name': '470',
   'accession': None},
  'text': 'Acinetobacter baumannii',
  'locations': [{'offset': 83, 'length': 23}]},
 {'id': '12',
  'infons': {'identifier': '470',
   'type': 'Species',
   'valid': True,
   'normalized': [470],
   'database': 'ncbi_taxonomy',
   'normalized_id': 470,
   'biotype': 'species',
   'name': '470',
   'accession': None},
  'text': 'A. baumannii',
  'locations': [{'offset': 224, 'length': 12}]},
 {'id': '13',
  'infons': {'identifier': '470',
   'type': 'Species',
   'valid': True,
   'normalized': [470],
   'database': 'ncbi_taxonomy',
   'normalized_id': 470,
   'biotype': 'species',
   'name': '470',
   'accession': None},
  'text': 'A. baumannii',
  'locations': [{'offset': 536, 'length': 12}]},
 {'id': '14',
  'infons': {'identifier': '470',
   'type': '

36941353


[{'id': '14',
  'infons': {'identifier': 'MESH:D016905',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D016905'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D016905',
   'biotype': 'disease',
   'name': 'Gram-Negative Bacterial Infections',
   'accession': '@DISEASE_Gram_Negative_Bacterial_Infections'},
  'text': 'Gram-negative infections',
  'locations': [{'offset': 202, 'length': 24}]},
 {'id': '15',
  'infons': {'identifier': '9606',
   'type': 'Species',
   'valid': True,
   'normalized': [9606],
   'database': 'ncbi_taxonomy',
   'normalized_id': 9606,
   'biotype': 'species',
   'name': '9606',
   'accession': None},
  'text': 'patients',
  'locations': [{'offset': 263, 'length': 8}]},
 {'id': '16',
  'infons': {'identifier': 'MESH:C032036',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C032036'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C032036',
   'biotype': 'chemical',
   'name': 'bromobenzene',
   'accession': '@CHEMICAL_bromobenzene'

33441878


[{'id': '11220',
  'infons': {'identifier': 'MESH:C054312',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C054312'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C054312',
   'biotype': 'chemical',
   'name': '2 3-diaminophenazine',
   'accession': '@CHEMICAL_2_3_diaminophenazine'},
  'text': '2,3 diamino phenazine',
  'locations': [{'offset': 554, 'length': 21}]},
 {'id': '11221',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'RPZ',
  'locations': [{'offset': 408, 'length': 3}]},
 {'id': '11222',
  'infons': {'identifier': 'MESH:D012293',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D012293'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D012293',
   'biotype': 'chemical',
   'name': 'Rifampin',
   'accession': '@CHEMICAL_Rifampin'},
  'text': 'rifampicin',
  'locations': [{'offset': 327, 'length': 10}]},
 {'id': '11223',
  'infons': {'identifier': 'MESH:D003

30115920


[{'id': '1869',
  'infons': {'identifier': '10090',
   'type': 'Species',
   'valid': True,
   'normalized': [10090],
   'database': 'ncbi_taxonomy',
   'normalized_id': 10090,
   'biotype': 'species',
   'name': '10090',
   'accession': None},
  'text': 'mouse',
  'locations': [{'offset': 981, 'length': 5}]},
 {'id': '1870',
  'infons': {'identifier': '562',
   'type': 'Species',
   'valid': True,
   'normalized': [562],
   'database': 'ncbi_taxonomy',
   'normalized_id': 562,
   'biotype': 'species',
   'name': '562',
   'accession': None},
  'text': 'Escherichia coli',
  'locations': [{'offset': 869, 'length': 16}]},
 {'id': '1871',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infection',
  'locations': [{'offset': 993, 'length': 9}]},
 {'id': '1872',
  'info

36114335


[{'id': '12160',
  'infons': {'identifier': 'MESH:D008712',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D008712'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D008712',
   'biotype': 'chemical',
   'name': 'Methicillin',
   'accession': '@CHEMICAL_Methicillin'},
  'text': 'methicillin',
  'locations': [{'offset': 704, 'length': 11}]},
 {'id': '12161',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'Staphylococcus aureus',
  'locations': [{'offset': 726, 'length': 21}]},
 {'id': '12162',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'DEM30355',
  'locations': [{'offset': 245, 'length': 8}]},
 {'id': '12163',
  'infons': {'identifier': '37632',
   'type': 'Species',
   'valid': True,
   'norma

32404991


[{'id': '16',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Chelonaplysilla sp.',
  'locations': [{'offset': 151, 'length': 19}]},
 {'id': '17',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
  'text': 'Mycobacterium tuberculosis',
  'locations': [{'offset': 262, 'length': 26}]},
 {'id': '18',
  'infons': {'identifier': 'MESH:D004224',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D004224'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D004224',
   'biotype': 'chemical',
   'name': 'Diterpenes',
   'accession': '@CHEMICAL_Diterpenes'},
  'text': 'diterpenoid',
  'locations': [{'offset': 400, 'length': 11}]},
 {'id': '19',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'bi

34789759


[{'id': '13187',
  'infons': {'identifier': 'MESH:C012182',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C012182'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C012182',
   'biotype': 'chemical',
   'name': 'angustmycin A',
   'accession': '@CHEMICAL_angustmycin_A'},
  'text': 'angustmycin A',
  'locations': [{'offset': 402, 'length': 13}]},
 {'id': '13188',
  'infons': {'identifier': 'MESH:D003583',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D003583'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003583',
   'biotype': 'chemical',
   'name': 'Cytokinins',
   'accession': '@CHEMICAL_Cytokinins'},
  'text': 'cytokinin',
  'locations': [{'offset': 137, 'length': 9}]},
 {'id': '13189',
  'infons': {'identifier': 'MESH:C014174',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C014174'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C014174',
   'biotype': 'chemical',
   'name': 'psicofuranine',
   'accession': '@CHEMICAL_psicofuranin

38228859


[{'id': '4398',
  'infons': {'identifier': '36470',
   'type': 'Species',
   'valid': True,
   'normalized': [36470],
   'database': 'ncbi_taxonomy',
   'normalized_id': 36470,
   'biotype': 'species',
   'name': '36470',
   'accession': None},
  'text': 'GAS',
  'locations': [{'offset': 605, 'length': 3}]},
 {'id': '4399',
  'infons': {'identifier': '1446890',
   'type': 'Species',
   'valid': True,
   'normalized': [1446890],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1446890,
   'biotype': 'species',
   'name': '1446890',
   'accession': None},
  'text': 'SAL',
  'locations': [{'offset': 592, 'length': 3}]},
 {'id': '4400',
  'infons': {'identifier': '1446890',
   'type': 'Species',
   'valid': True,
   'normalized': [1446890],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1446890,
   'biotype': 'species',
   'name': '1446890',
   'accession': None},
  'text': 'SAL',
  'locations': [{'offset': 428, 'length': 3}]},
 {'id': '4401',
  'infons': {'identifier': '10090',
  

35618784


[{'id': '18',
  'infons': {'type': 'Gene',
   'valid': False,
   'normalized_id': None,
   'biotype': 'gene'},
  'text': 'FtsZ',
  'locations': [{'offset': 104, 'length': 4}]},
 {'id': '19',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'substituted 2,6-difluorobenzamide',
  'locations': [{'offset': 234, 'length': 33}]},
 {'id': '20',
  'infons': {'type': 'Gene',
   'valid': False,
   'normalized_id': None,
   'biotype': 'gene'},
  'text': 'FtsZ',
  'locations': [{'offset': 304, 'length': 4}]},
 {'id': '21',
  'infons': {'type': 'Disease',
   'valid': False,
   'normalized_id': None,
   'biotype': 'disease'},
  'text': 'MRSA',
  'locations': [{'offset': 356, 'length': 4}]},
 {'id': '22',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'TXA709',
  'locations': [{'offset': 371, 'length': 6}]},
 {'id': '23',
  '

35422103


[{'id': '29',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'IMB-XMA0038',
  'locations': [{'offset': 166, 'length': 11}]},
 {'id': '30',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Mycobacterial tuberculosis',
  'locations': [{'offset': 207, 'length': 26}]},
 {'id': '31',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Mtb',
  'locations': [{'offset': 235, 'length': 3}]},
 {'id': '32',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'IMB-XMA0038',
  'locations': [{'offset': 337, 'length': 11}]},
 {'id': '33',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Mtb',
  'locations': [{'offset': 365, 'length': 3}]},


33479520


[{'id': '35',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': '2,4-disubstituted-4H-[1,3,4]-thiadiazine-5-ones',
  'locations': [{'offset': 391, 'length': 47}]},
 {'id': '36',
  'infons': {'identifier': 'MESH:C000618293',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000618293'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000618293',
   'biotype': 'chemical',
   'name': 'CL-55 compound',
   'accession': '@CHEMICAL_CL_55_compound'},
  'text': 'Fluorothiazinon',
  'locations': [{'offset': 440, 'length': 15}]},
 {'id': '37',
  'infons': {'identifier': 'MESH:D005641',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D005641'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D005641',
   'biotype': 'chemical',
   'name': 'Tegafur',
   'accession': '@CHEMICAL_Tegafur'},
  'text': 'FT',
  'locations': [{'offset': 457, 'length': 2}]},
 {'id': '38',
  'infons': {'identifier

15981414


[{'id': '14',
  'infons': {'identifier': 'MESH:D009285',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D009285'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D009285',
   'biotype': 'chemical',
   'name': 'Naphthoquinones',
   'accession': '@CHEMICAL_Naphthoquinones'},
  'text': 'naphthoquinone',
  'locations': [{'offset': 89, 'length': 14}]},
 {'id': '15',
  'infons': {'identifier': 'MESH:C502104',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C502104'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C502104',
   'biotype': 'chemical',
   'name': 'fumaquinone',
   'accession': '@CHEMICAL_fumaquinone'},
  'text': 'fumaquinone',
  'locations': [{'offset': 116, 'length': 11}]},
 {'id': '16',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': '5,7-dihydroxy-2-methoxy-3-methyl-6-(3-methyl-but-2-enyl)[1,4]naphthoquinone',
  'locations': [{'offset': 129, 'length': 75}]},
 

33067466


[{'id': '9786',
  'infons': {'identifier': '629395',
   'type': 'Species',
   'valid': True,
   'normalized': [629395],
   'database': 'ncbi_taxonomy',
   'normalized_id': 629395,
   'biotype': 'species',
   'name': '629395',
   'accession': None},
  'text': 'bacteria',
  'locations': [{'offset': 219, 'length': 8}]},
 {'id': '9787',
  'infons': {'identifier': 'MESH:D003141',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003141'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003141',
   'biotype': 'disease',
   'name': 'Communicable Diseases',
   'accession': '@DISEASE_Communicable_Diseases'},
  'text': 'infectious diseases',
  'locations': [{'offset': 175, 'length': 19}]},
 {'id': '9788',
  'infons': {'identifier': 'MESH:C000708275',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000708275'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000708275',
   'biotype': 'chemical',
   'name': 'corbomycin',
   'accession': '@CHEMICAL_corbomycin'},
  'text':

34035338


[{'id': '1881',
  'infons': {'type': 'Disease',
   'valid': False,
   'normalized_id': None,
   'biotype': 'disease'},
  'text': 'pathogen',
  'locations': [{'offset': 963, 'length': 8}]},
 {'id': '1882',
  'infons': {'identifier': 'MESH:D003092',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003092'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003092',
   'biotype': 'disease',
   'name': 'Colitis',
   'accession': '@DISEASE_Colitis'},
  'text': 'colitis',
  'locations': [{'offset': 424, 'length': 7}]},
 {'id': '1883',
  'infons': {'identifier': '1496',
   'type': 'Species',
   'valid': True,
   'normalized': [1496],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1496,
   'biotype': 'species',
   'name': '1496',
   'accession': None},
  'text': 'Clostridioides difficile',
  'locations': [{'offset': 135, 'length': 24}]},
 {'id': '1884',
  'infons': {'identifier': 'MESH:D003967',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003967'],
   'database'

21730979


[]

35440771


[{'id': '23041',
  'infons': {'identifier': '1514',
   'type': 'Gene',
   'ncbi_homologene': '129366',
   'valid': True,
   'normalized': [1514],
   'database': 'ncbi_gene',
   'normalized_id': 1514,
   'biotype': 'gene',
   'name': 'CTSL',
   'accession': '@GENE_CTSL'},
  'text': 'cathepsin L',
  'locations': [{'offset': 786, 'length': 11}]},
 {'id': '23042',
  'infons': {'identifier': '1508',
   'type': 'Gene',
   'ncbi_homologene': '37550',
   'valid': True,
   'normalized': [1508],
   'database': 'ncbi_gene',
   'normalized_id': 1508,
   'biotype': 'gene',
   'name': 'CTSB',
   'accession': '@GENE_CTSB'},
  'text': 'cathepsin B',
  'locations': [{'offset': 799, 'length': 11}]},
 {'id': '23043',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'S. aureus',
  'locations': [{'offset': 1193, 'length': 9

35562592


[{'id': '39',
  'infons': {'identifier': '87882',
   'type': 'Species',
   'valid': True,
   'normalized': [87882],
   'database': 'ncbi_taxonomy',
   'normalized_id': 87882,
   'biotype': 'species',
   'name': '87882',
   'accession': None},
  'text': 'Burkholderia cepacia complex',
  'locations': [{'offset': 70, 'length': 28}]},
 {'id': '40',
  'infons': {'identifier': '1196791',
   'type': 'Species',
   'valid': True,
   'normalized': [1196791],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1196791,
   'biotype': 'species',
   'name': '1196791',
   'accession': None},
  'text': 'Bcc',
  'locations': [{'offset': 100, 'length': 3}]},
 {'id': '41',
  'infons': {'identifier': '1196791',
   'type': 'Species',
   'valid': True,
   'normalized': [1196791],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1196791,
   'biotype': 'species',
   'name': '1196791',
   'accession': None},
  'text': 'Bcc',
  'locations': [{'offset': 170, 'length': 3}]},
 {'id': '42',
  'infons': {'identif

34522025


[{'id': '7064',
  'infons': {'identifier': '20525',
   'type': 'Gene',
   'valid': True,
   'normalized': [20525],
   'database': 'ncbi_gene',
   'normalized_id': 20525,
   'biotype': 'gene',
   'name': 'Slc2a1',
   'accession': '@GENE_SLC2A1'},
  'text': 'GT-1',
  'locations': [{'offset': 1052, 'length': 4}]},
 {'id': '7065',
  'infons': {'identifier': '20525',
   'type': 'Gene',
   'valid': True,
   'normalized': [20525],
   'database': 'ncbi_gene',
   'normalized_id': 20525,
   'biotype': 'gene',
   'name': 'Slc2a1',
   'accession': '@GENE_SLC2A1'},
  'text': 'GT-1',
  'locations': [{'offset': 780, 'length': 4}]},
 {'id': '7066',
  'infons': {'identifier': '20525',
   'type': 'Gene',
   'valid': True,
   'normalized': [20525],
   'database': 'ncbi_gene',
   'normalized_id': 20525,
   'biotype': 'gene',
   'name': 'Slc2a1',
   'accession': '@GENE_SLC2A1'},
  'text': 'GT-1',
  'locations': [{'offset': 509, 'length': 4}]},
 {'id': '7067',
  'infons': {'identifier': '-',
   'type': 'Che

29348522


[{'id': '42',
  'infons': {'identifier': 'MESH:D008712',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D008712'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D008712',
   'biotype': 'chemical',
   'name': 'Methicillin',
   'accession': '@CHEMICAL_Methicillin'},
  'text': 'methicillin',
  'locations': [{'offset': 110, 'length': 11}]},
 {'id': '43',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'Staphylococcus aureus',
  'locations': [{'offset': 132, 'length': 21}]},
 {'id': '44',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'MRSA',
  'locations': [{'offset': 155, 'length': 4}]},
 {'id': '45',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Pseudomonas

36781977


[{'id': '23',
  'infons': {'identifier': 'MESH:D010619',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D010619'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D010619',
   'biotype': 'chemical',
   'name': 'Phenazines',
   'accession': '@CHEMICAL_Phenazines'},
  'text': 'phenazines',
  'locations': [{'offset': 136, 'length': 10}]},
 {'id': '24',
  'infons': {'identifier': 'MESH:C033411',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C033411'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C033411',
   'biotype': 'chemical',
   'name': '1-methoxyphenazine',
   'accession': '@CHEMICAL_1_methoxyphenazine'},
  'text': '1-methoxyphenazine',
  'locations': [{'offset': 148, 'length': 18}]},
 {'id': '25',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'methyl-6-methoxyphenazine-1-carboxylate',
  'locations': [{'offset': 172, 'length': 39}]},
 {'id': '26',
  'infons': {'i

32346089


[{'id': '42',
  'infons': {'identifier': 'MESH:D018461',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D018461'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D018461',
   'biotype': 'disease',
   'name': 'Soft Tissue Infections',
   'accession': '@DISEASE_Soft_Tissue_Infections'},
  'text': 'skin and soft tissue infections',
  'locations': [{'offset': 164, 'length': 31}]},
 {'id': '43',
  'infons': {'identifier': 'MESH:C508887',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C508887'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C508887',
   'biotype': 'chemical',
   'name': 'retapamulin',
   'accession': '@CHEMICAL_retapamulin'},
  'text': 'retapamulin',
  'locations': [{'offset': 241, 'length': 11}]},
 {'id': '44',
  'infons': {'identifier': 'MESH:D018942',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D018942'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D018942',
   'biotype': 'chemical',
   'name': 'Macrolides',
   'accession

36997647


[{'id': '1522',
  'infons': {'identifier': '36635',
   'type': 'Gene',
   'valid': True,
   'normalized': [36635],
   'database': 'ncbi_gene',
   'normalized_id': 36635,
   'biotype': 'gene',
   'name': 'Dro',
   'accession': '@GENE_DRO'},
  'text': 'Drosocin',
  'locations': [{'offset': 119, 'length': 8}]},
 {'id': '1523',
  'infons': {'identifier': '7460',
   'type': 'Species',
   'valid': True,
   'normalized': [7460],
   'database': 'ncbi_taxonomy',
   'normalized_id': 7460,
   'biotype': 'species',
   'name': '7460',
   'accession': None},
  'text': 'honeybees',
  'locations': [{'offset': 534, 'length': 9}]},
 {'id': '1524',
  'infons': {'identifier': '7147',
   'type': 'Species',
   'valid': True,
   'normalized': [7147],
   'database': 'ncbi_taxonomy',
   'normalized_id': 7147,
   'biotype': 'species',
   'name': '7147',
   'accession': None},
  'text': 'flies',
  'locations': [{'offset': 145, 'length': 5}]}]

31919422


[{'id': '25',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'pentaminomycin C',
  'locations': [{'offset': 160, 'length': 16}]},
 {'id': '26',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Streptomyces cacaoi subsp.',
  'locations': [{'offset': 209, 'length': 26}]},
 {'id': '27',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'cacaoi',
  'locations': [{'offset': 236, 'length': 6}]},
 {'id': '28',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'NBRC 12748T',
  'locations': [{'offset': 243, 'length': 11}]},
 {'id': '29',
  'infons': {'identifier': 'MESH:C072732',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C072732'],
   'database': 'ncbi_mesh',
   'normalized_id'

34949834


[{'id': '18',
  'infons': {'identifier': '37162',
   'type': 'Species',
   'valid': True,
   'normalized': [37162],
   'database': 'ncbi_taxonomy',
   'normalized_id': 37162,
   'biotype': 'species',
   'name': '37162',
   'accession': None},
  'text': 'Mycobacterium avium complex',
  'locations': [{'offset': 154, 'length': 27}]},
 {'id': '19',
  'infons': {'identifier': '37162',
   'type': 'Species',
   'valid': True,
   'normalized': [37162],
   'database': 'ncbi_taxonomy',
   'normalized_id': 37162,
   'biotype': 'species',
   'name': '37162',
   'accession': None},
  'text': 'MAC',
  'locations': [{'offset': 183, 'length': 3}]},
 {'id': '20',
  'infons': {'identifier': 'MESH:C000722570',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000722570'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000722570',
   'biotype': 'chemical',
   'name': 'kimidinomycin',
   'accession': '@CHEMICAL_kimidinomycin'},
  'text': 'kimidinomycin',
  'locations': [{'offset': 267, 'lengt

31582803


[{'id': '24',
  'infons': {'identifier': 'MESH:C047544',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C047544'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C047544',
   'biotype': 'chemical',
   'name': 'liposidomycins',
   'accession': '@CHEMICAL_liposidomycins'},
  'text': 'Liposidomycin',
  'locations': [{'offset': 224, 'length': 13}]},
 {'id': '25',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'uridyl liponucleoside antibiotic',
  'locations': [{'offset': 243, 'length': 32}]},
 {'id': '26',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Streptomyces griseosporeus RK-1061',
  'locations': [{'offset': 290, 'length': 34}]},
 {'id': '27',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'peptidyl nucleoside antibiotic',
  

33087772


[{'id': '18223',
  'infons': {'identifier': 'MESH:D005557',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D005557'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D005557',
   'biotype': 'chemical',
   'name': 'Formaldehyde',
   'accession': '@CHEMICAL_Formaldehyde'},
  'text': 'formaldehyde',
  'locations': [{'offset': 947, 'length': 12}]},
 {'id': '18224',
  'infons': {'identifier': 'MESH:D005557',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D005557'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D005557',
   'biotype': 'chemical',
   'name': 'Formaldehyde',
   'accession': '@CHEMICAL_Formaldehyde'},
  'text': 'formaldehyde',
  'locations': [{'offset': 379, 'length': 12}]},
 {'id': '18225',
  'infons': {'identifier': 'MESH:C005195',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C005195'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C005195',
   'biotype': 'chemical',
   'name': 'methampicillin',
   'accession': '@CHEMICAL_metham

33558649


[{'id': '25',
  'infons': {'identifier': 'MESH:C000875',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000875'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000875',
   'biotype': 'chemical',
   'name': 'depsidone',
   'accession': '@CHEMICAL_depsidone'},
  'text': 'depsidone',
  'locations': [{'offset': 274, 'length': 9}]},
 {'id': '26',
  'infons': {'identifier': '4530',
   'type': 'Species',
   'valid': True,
   'normalized': [4530],
   'database': 'ncbi_taxonomy',
   'normalized_id': 4530,
   'biotype': 'species',
   'name': '4530',
   'accession': None},
  'text': 'rice',
  'locations': [{'offset': 321, 'length': 4}]},
 {'id': '27',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Chaetomium brasiliense SD-596',
  'locations': [{'offset': 335, 'length': 29}]},
 {'id': '28',
  'infons': {'identifier': 'MESH:C000875',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000875'],
   'databa

31558775


[{'id': '38',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'muraminomicin',
  'locations': [{'offset': 97, 'length': 13}]},
 {'id': '39',
  'infons': {'identifier': 'MESH:D008712',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D008712'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D008712',
   'biotype': 'chemical',
   'name': 'Methicillin',
   'accession': '@CHEMICAL_Methicillin'},
  'text': 'methicillin',
  'locations': [{'offset': 159, 'length': 11}]},
 {'id': '40',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'Staphylococcus aureus',
  'locations': [{'offset': 181, 'length': 21}]},
 {'id': '41',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': '

30209367


[{'id': '10',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'arylomycins',
  'locations': [{'offset': 408, 'length': 11}]},
 {'id': '11',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'G0775',
  'locations': [{'offset': 498, 'length': 5}]},
 {'id': '12',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'G0775',
  'locations': [{'offset': 585, 'length': 5}]},
 {'id': '13',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infection',
  'locations': [{'offset': 903, 'length': 9}]},
 {'id': '14',
  'infon

33390588


[{'id': '8688',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
  'text': 'Mycobacterium tuberculosis',
  'locations': [{'offset': 671, 'length': 26}]},
 {'id': '8689',
  'infons': {'identifier': 'MESH:D014364',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D014364'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014364',
   'biotype': 'chemical',
   'name': 'Tryptophan',
   'accession': '@CHEMICAL_Tryptophan'},
  'text': 'tryptophan',
  'locations': [{'offset': 595, 'length': 10}]},
 {'id': '8690',
  'infons': {'identifier': 'MESH:D009171',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D009171'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D009171',
   'biotype': 'chemical',
   'name': 'Mycolic Acids',
   'accession': '@CHEMICAL_Mycolic_Acids'},
  'text': 'mycolic-acid',
  '

30232378


[{'id': '17',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'pestiocandin (1)',
  'locations': [{'offset': 210, 'length': 16}]},
 {'id': '18',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'Pestalotiopsis humus FKI-7473',
  'locations': [{'offset': 274, 'length': 29}]},
 {'id': '19',
  'infons': {'identifier': '4932',
   'type': 'Species',
   'valid': True,
   'normalized': [4932],
   'database': 'ncbi_taxonomy',
   'normalized_id': 4932,
   'biotype': 'species',
   'name': '4932',
   'accession': None},
  'text': 'yeast',
  'locations': [{'offset': 341, 'length': 5}]},
 {'id': '20',
  'infons': {'identifier': '4932',
   'type': 'Species',
   'valid': True,
   'normalized': [4932],
   'database': 'ncbi_taxonomy',
   'normalized_id': 4932,
   'biotype': 'species',
   'name': '4932',
   'accession': None},
  'text': 'S. cerevisiae',

35013254


[{'id': '27266',
  'infons': {'identifier': '562',
   'type': 'Species',
   'valid': True,
   'normalized': [562],
   'database': 'ncbi_taxonomy',
   'normalized_id': 562,
   'biotype': 'species',
   'name': '562',
   'accession': None},
  'text': 'E. coli',
  'locations': [{'offset': 303, 'length': 7}]},
 {'id': '27267',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'pyridylpiperazine',
  'locations': [{'offset': 230, 'length': 17}]},
 {'id': '27268',
  'infons': {'identifier': '562',
   'type': 'Species',
   'valid': True,
   'normalized': [562],
   'database': 'ncbi_taxonomy',
   'normalized_id': 562,
   'biotype': 'species',
   'name': '562',
   'accession': None},
  'text': 'E. coli',
  'locations': [{'offset': 404, 'length': 7}]}]

34526714


[{'id': '11',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'infections',
  'locations': [{'offset': 117, 'length': 10}]},
 {'id': '12',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'diazabicyclooctane',
  'locations': [{'offset': 318, 'length': 18}]},
 {'id': '13',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'Diazabicyclooctane',
  'locations': [{'offset': 538, 'length': 18}]},
 {'id': '14',
  'infons': {'identifier': 'MESH:D047090',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D047090'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D047090',
   'biotype': 'chemi

34848841


[{'id': '17',
  'infons': {'identifier': 'MESH:C003169',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C003169'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C003169',
   'biotype': 'chemical',
   'name': 'pluramycin',
   'accession': '@CHEMICAL_pluramycin'},
  'text': 'pluramycin',
  'locations': [{'offset': 85, 'length': 10}]},
 {'id': '18',
  'infons': {'identifier': 'MESH:D061065',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D061065'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D061065',
   'biotype': 'chemical',
   'name': 'Polyketides',
   'accession': '@CHEMICAL_Polyketides'},
  'text': 'polyketide',
  'locations': [{'offset': 102, 'length': 10}]},
 {'id': '19',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'rausuquinone (1)',
  'locations': [{'offset': 114, 'length': 16}]},
 {'id': '20',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'va

25095806


[{'id': '11',
  'infons': {'identifier': 'MESH:D000078262',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D000078262'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D000078262',
   'biotype': 'chemical',
   'name': 'Rifaximin',
   'accession': '@CHEMICAL_Rifaximin'},
  'text': 'Rifaximin',
  'locations': [{'offset': 55, 'length': 9}]},
 {'id': '12',
  'infons': {'identifier': 'MESH:D012293',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D012293'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D012293',
   'biotype': 'chemical',
   'name': 'Rifampin',
   'accession': '@CHEMICAL_Rifampin'},
  'text': 'rifampin',
  'locations': [{'offset': 112, 'length': 8}]},
 {'id': '13',
  'infons': {'identifier': 'MESH:D000078262',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D000078262'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D000078262',
   'biotype': 'chemical',
   'name': 'Rifaximin',
   'accession': '@CHEMICAL_Rifaximin'},
  'text': 'Ri

28248311


[{'id': '3811',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
  'text': 'Mtb',
  'locations': [{'offset': 768, 'length': 3}]},
 {'id': '3812',
  'infons': {'identifier': 'MESH:D014376',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D014376'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014376',
   'biotype': 'disease',
   'name': 'Tuberculosis',
   'accession': '@DISEASE_Tuberculosis'},
  'text': 'TB',
  'locations': [{'offset': 982, 'length': 2}]},
 {'id': '3813',
  'infons': {'identifier': 'MESH:D014376',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D014376'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014376',
   'biotype': 'disease',
   'name': 'Tuberculosis',
   'accession': '@DISEASE_Tuberculosis'},
  'text': 'TB',
  'locations': [{'offset': 130, 'length': 2}]},
 {

32132676


[{'id': '10',
  'infons': {'identifier': 'MESH:D003015',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003015'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003015',
   'biotype': 'disease',
   'name': 'Clostridium Infections',
   'accession': '@DISEASE_Clostridium_Infections'},
  'text': 'Clostridium difficile',
  'locations': [{'offset': 83, 'length': 21}]},
 {'id': '11',
  'infons': {'identifier': 'MESH:D003015',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003015'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003015',
   'biotype': 'disease',
   'name': 'Clostridium Infections',
   'accession': '@DISEASE_Clostridium_Infections'},
  'text': 'C. difficile infections',
  'locations': [{'offset': 372, 'length': 23}]},
 {'id': '12',
  'infons': {'identifier': 'MESH:D003015',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003015'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003015',
   'biotype': 'disease',
   'name': 'Clostrid

32249833


[{'id': '145',
  'infons': {'identifier': 'MESH:D003015',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003015'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003015',
   'biotype': 'disease',
   'name': 'Clostridium Infections',
   'accession': '@DISEASE_Clostridium_Infections'},
  'text': 'C. difficile',
  'locations': [{'offset': 1033, 'length': 12}]},
 {'id': '146',
  'infons': {'identifier': 'MESH:C066851',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C066851'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C066851',
   'biotype': 'chemical',
   'name': 'mithramycin A',
   'accession': '@CHEMICAL_mithramycin_A'},
  'text': 'mithramycin A',
  'locations': [{'offset': 1107, 'length': 13}]},
 {'id': '147',
  'infons': {'identifier': 'MESH:D016685',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D016685'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D016685',
   'biotype': 'chemical',
   'name': 'Mitomycin',
   'accession': '@CHEM

33981028


[{'id': '6631',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'lanostane',
  'locations': [{'offset': 381, 'length': 9}]},
 {'id': '6632',
  'infons': {'identifier': 'MESH:D014376',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D014376'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014376',
   'biotype': 'disease',
   'name': 'Tuberculosis',
   'accession': '@DISEASE_Tuberculosis'},
  'text': 'tuberculosis',
  'locations': [{'offset': 506, 'length': 12}]},
 {'id': '6633',
  'infons': {'identifier': '1773',
   'type': 'Species',
   'valid': True,
   'normalized': [1773],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1773,
   'biotype': 'species',
   'name': '1773',
   'accession': None},
  'text': 'H37Rv',
  'locations': [{'offset': 461, 'length': 5}]},
 {'id': '6634',
  'infons': {'identifier': '419947',
   'type': 'Species',
   'valid': True,
   'normalized': [419947],
 

32572099


[{'id': '1437',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'SM1',
  'locations': [{'offset': 311, 'length': 3}]},
 {'id': '1438',
  'infons': {'identifier': 'MESH:D018088',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D018088'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D018088',
   'biotype': 'disease',
   'name': 'Tuberculosis Multidrug-Resistant',
   'accession': '@DISEASE_Tuberculosis_Multidrug_Resistant'},
  'text': 'multidrug resistance',
  'locations': [{'offset': 1324, 'length': 20}]},
 {'id': '1439',
  'infons': {'identifier': 'MESH:D000667',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D000667'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D000667',
   'biotype': 'chemical',
   'name': 'Ampicillin',
   'accession': '@CHEMICAL_Ampicillin'},
  'text': 'ampicillin',
  'locations': [{'offset': 598, 'length': 10}]},
 {'id': '1440',
  'infons': {'id

31949315


[{'id': '3446',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'fasamycin',
  'locations': [{'offset': 759, 'length': 9}]},
 {'id': '3447',
  'infons': {'identifier': 'MESH:D014640',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D014640'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D014640',
   'biotype': 'chemical',
   'name': 'Vancomycin',
   'accession': '@CHEMICAL_Vancomycin'},
  'text': 'vancomycin',
  'locations': [{'offset': 574, 'length': 10}]},
 {'id': '3448',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'fasamycin',
  'locations': [{'offset': 412, 'length': 9}]},
 {'id': '3449',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'MRSA',
  'locations': [{'offset': 564, 'length': 4}]},
 {'id': '3450',
  'infons': {'ide

35882958


[{'id': '4040',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'suertide A',
  'locations': [{'offset': 345, 'length': 10}]},
 {'id': '4041',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'MST-135876',
  'locations': [{'offset': 232, 'length': 10}]},
 {'id': '4042',
  'infons': {'identifier': '37632',
   'type': 'Species',
   'valid': True,
   'normalized': [37632],
   'database': 'ncbi_taxonomy',
   'normalized_id': 37632,
   'biotype': 'species',
   'name': '37632',
   'accession': None},
  'text': 'Amycolatopsis sp',
  'locations': [{'offset': 97, 'length': 16}]},
 {'id': '4043',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'd-Val',
  'locations': [{'offset': 395, 'length': 5}]},
 {'id': '4044',
  'infons': {'identifier': '-',
   'type': 

36642755


[{'id': '21',
  'infons': {'type': 'Disease',
   'valid': False,
   'normalized_id': None,
   'biotype': 'disease'},
  'text': 'ESKAPE',
  'locations': [{'offset': 263, 'length': 6}]},
 {'id': '22',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'sulfoxanthocillin (1)',
  'locations': [{'offset': 358, 'length': 21}]},
 {'id': '23',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'peniformamide',
  'locations': [{'offset': 415, 'length': 13}]},
 {'id': '24',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'xanthocillin X (3)',
  'locations': [{'offset': 457, 'length': 18}]},
 {'id': '25',
  'infons': {'identifier': '5081',
   'type': 'Species',
   'valid': True,
   'normalized': [5081],
   'database': 'ncbi_taxonomy',
   'n

31992865


[{'id': '20174',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'MRSA',
  'locations': [{'offset': 1274, 'length': 4}]},
 {'id': '20175',
  'infons': {'identifier': 'MESH:D010068',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D010068'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D010068',
   'biotype': 'chemical',
   'name': 'Oxacillin',
   'accession': '@CHEMICAL_Oxacillin'},
  'text': 'oxacillin',
  'locations': [{'offset': 1256, 'length': 9}]},
 {'id': '20176',
  'infons': {'identifier': 'MESH:D010068',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D010068'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D010068',
   'biotype': 'chemical',
   'name': 'Oxacillin',
   'accession': '@CHEMICAL_Oxacillin'},
  'text': 'oxacillin',
  'locations': [{'offset': 475, 'length': 9}]},
 {'id': '20177',
  'infons': {'identifier': '1352',
   'type': 'Species',
   'valid': True,
   'normalized': [

36539416


[{'id': '19307',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'sphaerimicin',
  'locations': [{'offset': 792, 'length': 12}]},
 {'id': '19308',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'sphaerimicins',
  'locations': [{'offset': 466, 'length': 13}]},
 {'id': '19309',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'S. aureus',
  'locations': [{'offset': 990, 'length': 9}]},
 {'id': '19310',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'Sphaerimicins',
  'locations': [{'offset': 300, 'length': 13}]},
 {'id': '19311',
  'infons': {'identifier': '1352',
   'type': 'Species',
   'valid': True,
   'normalized': [1352],
   'database': 'ncbi_taxonomy',
 

34837061


[{'id': '27',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'hydroxamate',
  'locations': [{'offset': 628, 'length': 11}]},
 {'id': '28',
  'infons': {'identifier': 'MESH:C000722661',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C000722661'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C000722661',
   'biotype': 'chemical',
   'name': 'TP0586532',
   'accession': '@CHEMICAL_TP0586532'},
  'text': 'TP0586532',
  'locations': [{'offset': 838, 'length': 9}]},
 {'id': '29',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'hydroxamate',
  'locations': [{'offset': 855, 'length': 11}]},
 {'id': '30',
  'infons': {'identifier': 'MESH:D015780',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D015780'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D015780',
   'biotype': 'chemical',


33767455


[{'id': '19',
  'infons': {'identifier': 'MESH:D004151',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D004151'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D004151',
   'biotype': 'chemical',
   'name': 'Dipeptides',
   'accession': '@CHEMICAL_Dipeptides'},
  'text': 'dipeptides',
  'locations': [{'offset': 144, 'length': 10}]},
 {'id': '20',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'H-Lys-Gpn-PEA',
  'locations': [{'offset': 155, 'length': 13}]},
 {'id': '21',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'H-Lys-beta3,3AC6C-PEA',
  'locations': [{'offset': 174, 'length': 21}]},
 {'id': '22',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'THPA',
  'locations': [{'offset': 205, 'length': 4}]},

34234284


[{'id': '27',
  'infons': {'identifier': 'MESH:D007239',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D007239'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D007239',
   'biotype': 'disease',
   'name': 'Infections',
   'accession': '@DISEASE_Infections'},
  'text': 'superbug infection',
  'locations': [{'offset': 103, 'length': 18}]},
 {'id': '28',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'metallo-beta-lactamases',
  'locations': [{'offset': 132, 'length': 23}]},
 {'id': '29',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'MbetaLs',
  'locations': [{'offset': 157, 'length': 7}]},
 {'id': '30',
  'infons': {'type': 'Species',
   'valid': False,
   'normalized_id': None,
   'biotype': 'species'},
  'text': 'New Delhi metallo-beta-lactamase',
  'locations': [{'offset': 214, 'length': 3

30181560


[{'id': '8362',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'peptidylnucleoside',
  'locations': [{'offset': 672, 'length': 18}]},
 {'id': '8363',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'Albomycin delta2',
  'locations': [{'offset': 906, 'length': 16}]},
 {'id': '8364',
  'infons': {'identifier': 'MESH:C015657',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C015657'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C015657',
   'biotype': 'chemical',
   'name': 'albomycin',
   'accession': '@CHEMICAL_albomycin'},
  'text': 'albomycins',
  'locations': [{'offset': 792, 'length': 10}]},
 {'id': '8365',
  'infons': {'identifier': 'MESH:D013203',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D013203'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D013203',
   'biotype': 

32424122


[{'id': '2996',
  'infons': {'identifier': 'MESH:C068161',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C068161'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C068161',
   'biotype': 'chemical',
   'name': 'venturicidin A',
   'accession': '@CHEMICAL_venturicidin_A'},
  'text': 'VentA',
  'locations': [{'offset': 1452, 'length': 5}]},
 {'id': '2997',
  'infons': {'identifier': 'MESH:D003643',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D003643'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D003643',
   'biotype': 'disease',
   'name': 'Death',
   'accession': '@DISEASE_Death'},
  'text': 'death',
  'locations': [{'offset': 265, 'length': 5}]},
 {'id': '2998',
  'infons': {'identifier': 'MESH:C068161',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['C068161'],
   'database': 'ncbi_mesh',
   'normalized_id': 'C068161',
   'biotype': 'chemical',
   'name': 'venturicidin A',
   'accession': '@CHEMICAL_venturicidin_A'},
  'text': 'venturic

20057515


[{'id': '19',
  'infons': {'type': 'Gene',
   'valid': False,
   'normalized_id': None,
   'biotype': 'gene'},
  'text': 'WalR',
  'locations': [{'offset': 119, 'length': 4}]},
 {'id': '20',
  'infons': {'identifier': '1423',
   'type': 'Species',
   'valid': True,
   'normalized': [1423],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1423,
   'biotype': 'species',
   'name': '1423',
   'accession': None},
  'text': 'Bacillus subtilis',
  'locations': [{'offset': 267, 'length': 17}]},
 {'id': '21',
  'infons': {'identifier': '1280',
   'type': 'Species',
   'valid': True,
   'normalized': [1280],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1280,
   'biotype': 'species',
   'name': '1280',
   'accession': None},
  'text': 'Staphylococcus aureus',
  'locations': [{'offset': 289, 'length': 21}]},
 {'id': '22',
  'infons': {'identifier': '1931',
   'type': 'Species',
   'valid': True,
   'normalized': [1931],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1931,
   'bioty

36071214


[{'id': '10652',
  'infons': {'identifier': 'MESH:D008712',
   'type': 'Chemical',
   'valid': True,
   'normalized': ['D008712'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D008712',
   'biotype': 'chemical',
   'name': 'Methicillin',
   'accession': '@CHEMICAL_Methicillin'},
  'text': 'methicillin',
  'locations': [{'offset': 783, 'length': 11}]},
 {'id': '10653',
  'infons': {'identifier': '1213861',
   'type': 'Species',
   'valid': True,
   'normalized': [1213861],
   'database': 'ncbi_taxonomy',
   'normalized_id': 1213861,
   'biotype': 'species',
   'name': '1213861',
   'accession': None},
  'text': 'Actinocrispum wychmicini',
  'locations': [{'offset': 313, 'length': 24}]},
 {'id': '10654',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'valid': False,
   'normalized_id': None,
   'biotype': 'chemical'},
  'text': 'spirotetronates',
  'locations': [{'offset': 372, 'length': 15}]},
 {'id': '10655',
  'infons': {'identifier': '-',
   'type': 'Chemical',
   'val

33741965


[{'id': '9349',
  'infons': {'identifier': '10090',
   'type': 'Species',
   'valid': True,
   'normalized': [10090],
   'database': 'ncbi_taxonomy',
   'normalized_id': 10090,
   'biotype': 'species',
   'name': '10090',
   'accession': None},
  'text': 'mice',
  'locations': [{'offset': 680, 'length': 4}]},
 {'id': '9350',
  'infons': {'identifier': 'MESH:D006069',
   'type': 'Disease',
   'valid': True,
   'normalized': ['D006069'],
   'database': 'ncbi_mesh',
   'normalized_id': 'D006069',
   'biotype': 'disease',
   'name': 'Gonorrhea',
   'accession': '@DISEASE_Gonorrhea'},
  'text': 'Neisseria gonorrhoeae infection',
  'locations': [{'offset': 645, 'length': 31}]},
 {'id': '9351',
  'infons': {'identifier': '9606',
   'type': 'Species',
   'valid': True,
   'normalized': [9606],
   'database': 'ncbi_taxonomy',
   'normalized_id': 9606,
   'biotype': 'species',
   'name': '9606',
   'accession': None},
  'text': 'humans',
  'locations': [{'offset': 310, 'length': 6}]},
 {'id': '9

In [None]:
pos_df[ pos_df['PubTator'].str.len() < 1 ]

Unnamed: 0,Title,PMID,Targets,Abstract,PubTator
41,Identification and evaluation of a potent nove...,21730979,['NU-6027'],The ataxia telangiectasia mutated and Rad3-rel...,[]
48,Inhibition of translation termination by the a...,36997647,['Drosocin'],The proline-rich antimicrobial peptide (PrAMP)...,[]


In [None]:
def pubtator_process(id):

    url = f"https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson?pmids={id}&full=true"
    res = requests.get(url)
    print(id)
    res_js = json.loads(res.content)['PubTator3']

    # Only working with abstracts
    abstract_js = [js for js in res_js[0]['passages'] if js['infons']['type'] == 'abstract']

    pubt_annot = [js['annotations'] for js in abstract_js]
    pubt_annot = sum(pubt_annot, [])
    #display(pubt_annot)
    pubt_res = pubtator_generate_struct(pubt_annot)

    time.sleep(0.6)

    return sorted(pubt_res, key=lambda d: d['location']['start'])

In [None]:
pos_df

Unnamed: 0,Title,PMID,Targets,Abstract,PubTator
0,2-Pyrazol-1-yl-thiazole derivatives as novel h...,31358913,['2-(pyrazol-1-yl)- thiazole'],The present report describes our efforts to id...,"[{'text': '2-pyrazol-1-yl-thiazole', 'location..."
1,A choline binding polypeptide of LytA inhibits...,30127421,['ChBp'],Streptococcus pneumoniae is a pathogen that ma...,"[{'text': 'choline', 'location': {'start': 906..."
2,A naturally inspired antibiotic to target mult...,34987225,['Macolacin'],Gram-negative bacteria are responsible for an ...,"[{'text': 'colistin3', 'location': {'start': 4..."
3,A new class of synthetic retinoid antibiotics ...,29590091,"['CD1530', 'CD437']",A challenge in the treatment of Staphylococcus...,"[{'text': 'methicillin', 'location': {'start':..."
4,"A novel, rationally designed, hybrid antimicro...",32499514,['PA-13'],Antimicrobial peptides (AMPs) are promising al...,"[{'text': 'lipopolysaccharide', 'location': {'..."
...,...,...,...,...,...
75,Total synthesis and antimicrobial evaluation o...,30181560,"['Albomycin', 'albomycin delta 2', 'grisein', ...",Development of effective antimicrobial agents ...,"[{'text': 'peptidylnucleoside', 'location': {'..."
76,"Venturicidin A, A Membrane-active Natural Prod...",32424122,['Venturicidin A'],Despite the remarkable advances due to the dis...,"[{'text': 'VentA', 'location': {'start': 1452,..."
77,"Walkmycin B targets WalK (YycG), a histidine k...",20057515,['Walkmycin B'],The WalK (a histidine kinase)/WalR (a response...,"[{'text': 'walkmycin A, B and C', 'location': ..."
78,"Wychimicins, a new class of spirotetronate pol...",36071214,['Wychimicin'],In the course of our screening program for new...,"[{'text': 'methicillin', 'location': {'start':..."


In [None]:
def collapse_pubtator(row):
    texts = []
    for d in row:
        texts.append(d['text'])
    return list(set(texts))

pos_df['PubTator_simplified'] = pos_df['PubTator'].apply(collapse_pubtator)
pos_df

Unnamed: 0,Title,PMID,Targets,Abstract,PubTator,PubTator_simplified
0,2-Pyrazol-1-yl-thiazole derivatives as novel h...,31358913,['2-(pyrazol-1-yl)- thiazole'],The present report describes our efforts to id...,"[{'text': '2-pyrazol-1-yl-thiazole', 'location...","[2-pyrazol-1-yl-thiazole, 2-pyrazol-1-yl-thiaz..."
1,A choline binding polypeptide of LytA inhibits...,30127421,['ChBp'],Streptococcus pneumoniae is a pathogen that ma...,"[{'text': 'choline', 'location': {'start': 906...",[choline]
2,A naturally inspired antibiotic to target mult...,34987225,['Macolacin'],Gram-negative bacteria are responsible for an ...,"[{'text': 'colistin3', 'location': {'start': 4...","[macolacin, biphenyl, colistin3]"
3,A new class of synthetic retinoid antibiotics ...,29590091,"['CD1530', 'CD437']",A challenge in the treatment of Staphylococcus...,"[{'text': 'methicillin', 'location': {'start':...","[methicillin, CD437, retinoids, gentamicin, li..."
4,"A novel, rationally designed, hybrid antimicro...",32499514,['PA-13'],Antimicrobial peptides (AMPs) are promising al...,"[{'text': 'lipopolysaccharide', 'location': {'...","[AMPs, Antimicrobial peptides, LPS, lipopolysa..."
...,...,...,...,...,...,...
75,Total synthesis and antimicrobial evaluation o...,30181560,"['Albomycin', 'albomycin delta 2', 'grisein', ...",Development of effective antimicrobial agents ...,"[{'text': 'peptidylnucleoside', 'location': {'...","[albomycins, albomycins delta1 (1a), delta2 (1..."
76,"Venturicidin A, A Membrane-active Natural Prod...",32424122,['Venturicidin A'],Despite the remarkable advances due to the dis...,"[{'text': 'VentA', 'location': {'start': 1452,...","[methicillin, VentA, aminoglycoside, gentamici..."
77,"Walkmycin B targets WalK (YycG), a histidine k...",20057515,['Walkmycin B'],The WalK (a histidine kinase)/WalR (a response...,"[{'text': 'walkmycin A, B and C', 'location': ...","[silica, C(44)H(44)Cl(2)O(14), BE40665A., walk..."
78,"Wychimicins, a new class of spirotetronate pol...",36071214,['Wychimicin'],In the course of our screening program for new...,"[{'text': 'methicillin', 'location': {'start':...","[methicillin, beta-d-xylo-hexopyranose, Wychim..."


In [None]:
pos_df.to_csv('pubtator_nature.csv', columns=["PMID","Title","Abstract","PubTator_simplified"], index=False)