## Imports

In [None]:
import numpy as np
import pandas as pd

In [None]:
!pip install transformers



In [None]:
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.14.1-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading xmltodict-0.14.1-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.1


In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline

## Processing database


In [None]:
database = pd.read_excel('DatasetPositivo.xlsx')
database

Unnamed: 0,abstract,T5,Alpaca,LaMini,Score,Answer,Unnamed: 6
0,A collection of rifampin-resistant mutants of ...,<pad> yes</s>,yes,Yes.,3,YES,
1,A comparative evaluation of daptomycin and eig...,<pad> yes</s>,Yes,Yes.,3,YES,
2,A major antimicrobial resistance mechanism in ...,<pad> yes</s>,No,Yes.,2,YES,
3,A new environmental bacterial strain exhibited...,<pad> yes</s>,Yes,Yes.,3,YES,
4,A tyrothricin concentration of 0.01 mg. per ml...,<pad> yes</s>,Yes,Yes.,3,YES,
...,...,...,...,...,...,...,...
111,,,,,,,
112,,,,,,,
113,,,,,TOTAL,111,
114,,,,,YES,99,0.891892


In [None]:
database = database.iloc[0:111]
database

Unnamed: 0,abstract,T5,Alpaca,LaMini,Score,Answer,Unnamed: 6
0,A collection of rifampin-resistant mutants of ...,<pad> yes</s>,yes,Yes.,3,YES,
1,A comparative evaluation of daptomycin and eig...,<pad> yes</s>,Yes,Yes.,3,YES,
2,A major antimicrobial resistance mechanism in ...,<pad> yes</s>,No,Yes.,2,YES,
3,A new environmental bacterial strain exhibited...,<pad> yes</s>,Yes,Yes.,3,YES,
4,A tyrothricin concentration of 0.01 mg. per ml...,<pad> yes</s>,Yes,Yes.,3,YES,
...,...,...,...,...,...,...,...
106,We aimed to investigate the effect of antepart...,<pad> yes</s>,"No, the data suggests that spiramycin treatmen...",Yes.,2.5,YES,
107,We have recently disclosed the discovery of th...,<pad> yes</s>,Yes,Yes.,3,YES,
108,We investigated the in vitro activity of AR-70...,<pad> yes</s>,yes,Yes.,3,YES,
109,We successfully produced two human β-defensins...,<pad> yes</s>,Yes,Yes.,3,YES,


In [None]:
pmc_abstracts = database['Abstract'].values.tolist()
pmc_abstracts

['A collection of rifampin-resistant mutants of Staphylococcus aureus with characterized RNA polymerase beta-subunit (rpoB) gene mutations was cross-screened against a number of other RNA polymerase inhibitors to correlate susceptibility with specific rpoB genotypes. The rpoB mutants were cross-resistant to streptolydigin and sorangicin A. In contrast, thiolutin, holomycin, corallopyronin A, and ripostatin A retained activity against the rpoB mutants. The second group of inhibitors may be of interest as drug development candidates.',
 'A comparative evaluation of daptomycin and eight other antimicrobial agents was performed by the agar dilution technique with 56 strains of vancomycin-resistant gram-positive bacteria, including Leuconostoc, Lactobacillus, and Pediococcus spp. Erythromycin, deptomycin, clindamycin, and gentamicin exhibited the greatest activities, whereas penicillin, ampicillin, and cefotaxime showed moderate activities. The organisms were all highly resistant to vancomy

### T5

In [None]:
responses1 = []
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

for text in pmc_abstracts:
    #input = f"question: There is a reference to the use of chemical compounds as medicines in the context? Say yes or no, context: {text}"
    #input = f"Is any medication used against an infection or disease mentioned in the context? Say yes or no, context: {text}"
    input = f"Is any antibiotic, anti-infective, antimicrobial compound used against biofilm, parasites, protozoans, virus, fungal or bacterial? Say yes or no, context: {text}"
    input_ids = tokenizer(input, return_tensors="pt").input_ids
    try:
        outputs = model.generate(input_ids)
        responses1.append(tokenizer.decode(outputs[0]))
    except:
        responses1.append('No')



In [None]:
print(responses1)

['<pad> yes</s>']


In [None]:
print(np.unique(responses1, return_counts=True))

(array(['<pad> No</s>', '<pad> no</s>', '<pad> yes</s>'], dtype='<U13'), array([ 7, 14, 89]))


In [None]:
display(list(zip(pmc_abstracts, responses1)))

[('A collection of rifampin-resistant mutants of Staphylococcus aureus with characterized RNA polymerase beta-subunit (rpoB) gene mutations was cross-screened against a number of other RNA polymerase inhibitors to correlate susceptibility with specific rpoB genotypes. The rpoB mutants were cross-resistant to streptolydigin and sorangicin A. In contrast, thiolutin, holomycin, corallopyronin A, and ripostatin A retained activity against the rpoB mutants. The second group of inhibitors may be of interest as drug development candidates.',
  '<pad> no</s>'),
 ('A comparative evaluation of daptomycin and eight other antimicrobial agents was performed by the agar dilution technique with 56 strains of vancomycin-resistant gram-positive bacteria, including Leuconostoc, Lactobacillus, and Pediococcus spp. Erythromycin, deptomycin, clindamycin, and gentamicin exhibited the greatest activities, whereas penicillin, ampicillin, and cefotaxime showed moderate activities. The organisms were all highly

### Alpaca

In [None]:
responses2 = []
model = pipeline(model="declare-lab/flan-alpaca-base")

for text in pmc_abstracts:
    #input = f"question: There is a reference to the use of chemical compounds as medicines in the context? Say yes or no, context: {text}"
    #input = f"Is any medication used against an infection or disease mentioned in the context? Say yes or no, context: {text}"
    input = f"Is any antibiotic, anti-infective, antimicrobial compound used against biofilm, parasites, protozoans, virus, fungal or bacterial? Say yes or no, context: {text}"
    try:
        output = model(input, max_length=256, do_sample=True)
        responses2.append(output)
    except:
        responses2.append('No')



In [None]:
responses2 = [response[0]['generated_text'] for response in responses2]

print(responses2)

['No']


In [None]:
print(np.unique(responses2, return_counts=True))

(array(['No', 'No, No, No, No.',
       'No, PlySK1249 is a promising anti-bacterial compound available for the treatment of bacterial infections. It is an anti-bacteriological inhibitor or a tack blocking agent. It is also a promising candidate for bacterial therapy.',
       'No, bacterial', 'No, but no',
       'No, context: Actinoplanes friuliensis produces cyclic peptide antibiotic friulimicin, which is a cyclic peptide with one exocyclic amino acid linked to a branched-chain fatty acid acyl residue. The structural relationship to daptomycin and the excellent antibacterial performance of friulimicin make the antibiotic an attractive drug candidate.',
       'No, context: Many approaches are used to discover new antibiotic compounds, one of the most widespread being the chemical modification of known antibiotics. This type of discovery has been so important in the development of new antibiotics that most antibiotics used today belong to the same chemical classes as antibiotics disc

In [None]:
display(list(zip(pmc_abstracts, responses2)))

[('A collection of rifampin-resistant mutants of Staphylococcus aureus with characterized RNA polymerase beta-subunit (rpoB) gene mutations was cross-screened against a number of other RNA polymerase inhibitors to correlate susceptibility with specific rpoB genotypes. The rpoB mutants were cross-resistant to streptolydigin and sorangicin A. In contrast, thiolutin, holomycin, corallopyronin A, and ripostatin A retained activity against the rpoB mutants. The second group of inhibitors may be of interest as drug development candidates.',
  'yes'),
 ('A comparative evaluation of daptomycin and eight other antimicrobial agents was performed by the agar dilution technique with 56 strains of vancomycin-resistant gram-positive bacteria, including Leuconostoc, Lactobacillus, and Pediococcus spp. Erythromycin, deptomycin, clindamycin, and gentamicin exhibited the greatest activities, whereas penicillin, ampicillin, and cefotaxime showed moderate activities. The organisms were all highly resistan

### LaMini

In [None]:
responses3 = []
model = pipeline('text2text-generation', model="MBZUAI/LaMini-T5-223M")

for text in pmc_abstracts:
    #input = f"question: There is a reference to the use of chemical compounds as medicines in the context? Say yes or no, context: {text}"
    #input = f"question: Is any medication used against an infection or disease mentioned in the context? Say yes or no, context: {text}"
    input = f"question: Is any antibiotic, anti-infective, antimicrobial compound used against biofilm, parasites, protozoans, virus, fungal or bacterial? Say yes or no, context: {text}"
    try:
        output = model(input, max_length=256, do_sample=True)
        responses3.append(output)
    except:
        responses3.append('No')



In [None]:
responses3 = [response[0]['generated_text'] for response in responses3]

print(responses3)

['Yes.']


In [None]:
print(np.unique(responses3, return_counts=True))

(array(['It is not clear from the context whether an antibiotic, anti-infective, antimicrobial compound is used against biofilm, parasites, protozoans, virus, fungal or bacterial bacteria. Only bacteria should be treated with antibiotics.',
       'It is not specified in the given context whether any antibiotic, anti-infective, antimicrobial compound is used against biofilm, parasites, protozoans, virus, fungal or bacterial. Therefore, I cannot provide an answer to the question.',
       'No.',
       'No. Antibiotics are not used against any of the mentioned mentioned compounds.',
       'No. The provided context discusses the activity spectrum of streptothricin against resistant bacteriophages and confirms the potential use of this compound as a potential therapeutic for drug-resistant, gram-negative pathogens. It does not provide any specific research or treatment options.',
       'Sure. No.',
       "The context doesn't provide information on whether any antibiotic, anti-infective

In [None]:
display(list(zip(pmc_abstracts, responses3)))

[('A collection of rifampin-resistant mutants of Staphylococcus aureus with characterized RNA polymerase beta-subunit (rpoB) gene mutations was cross-screened against a number of other RNA polymerase inhibitors to correlate susceptibility with specific rpoB genotypes. The rpoB mutants were cross-resistant to streptolydigin and sorangicin A. In contrast, thiolutin, holomycin, corallopyronin A, and ripostatin A retained activity against the rpoB mutants. The second group of inhibitors may be of interest as drug development candidates.',
  'Yes.'),
 ('A comparative evaluation of daptomycin and eight other antimicrobial agents was performed by the agar dilution technique with 56 strains of vancomycin-resistant gram-positive bacteria, including Leuconostoc, Lactobacillus, and Pediococcus spp. Erythromycin, deptomycin, clindamycin, and gentamicin exhibited the greatest activities, whereas penicillin, ampicillin, and cefotaxime showed moderate activities. The organisms were all highly resista

### Resumen


In [None]:
allresponses = pd.DataFrame({'Abstract':pmc_abstracts, 'T5':responses1, 'Alpaca':responses2, 'LaMini':responses3})
allresponses

Unnamed: 0,Abstract,T5,Alpaca,LaMini
0,A collection of rifampin-resistant mutants of ...,<pad> no</s>,yes,Yes.
1,A comparative evaluation of daptomycin and eig...,<pad> yes</s>,No,Yes.
2,A major antimicrobial resistance mechanism in ...,<pad> yes</s>,"No, the program aims to develop an effective v...",Yes.
3,A new environmental bacterial strain exhibited...,<pad> yes</s>,Yes,Yes.
4,A tyrothricin concentration of 0.01 mg. per ml...,<pad> no</s>,"Yes, it is not possible to determine which ant...",No.
...,...,...,...,...
105,Viomycin and capreomycin are members of the tu...,<pad> yes</s>,Yes,No.
106,We aimed to investigate the effect of antepart...,<pad> yes</s>,Yes.,Yes.
107,We have recently disclosed the discovery of th...,<pad> yes</s>,No.,Yes.
108,We investigated the in vitro activity of AR-70...,<pad> yes</s>,Yes,Yes.


In [None]:
#allresponses.to_excel('dataset_pmc_q1.xlsx', index=False)
#allresponses.to_excel('dataset_pmc_q2.xlsx', index=False)
allresponses.to_excel('dataset_pmc_q3.xlsx', index=False)