In [None]:
from gliner import GLiNER
import json
from transformers import pipeline
from gliner_to_docred import convert_gliner_to_docred

# Input text
input_text = """
Although it varies with age and geographical distribution, the global burden of infection with Streptococcus pneumoniae (pneumococcus) remains considerable. 
The elderly, and younger adults with comorbid conditions, are at particularly high risk of pneumococcal infection, and this risk will increase as the population ages. 
Vaccination should be the backbone of our current strategies to deal with this infection.
Main body: This manuscript reviews the history of the development of pneumococcal vaccines, and the impact of different vaccines and vaccination strategies over the past 111 years. 
It documents the early years of vaccine development in the gold mines of South Africa, when vaccination with killed pneumococci was shown to be effective, even before the recognition that different pneumococci were antigenically distinct. 
The development of type-specific vaccines, still with whole killed pneumococci, showed a high degree of efficacy. 
The identification of the importance of the pneumococcal capsule heralded the era of vaccination with capsular polysaccharides, although with the advent of penicillin, interest in pneumococcal vaccine development waned. 
The efforts of Austrian and his colleagues, who documented that despite penicillin therapy, patients still died from pneumococcal infection in the first 96 h, ultimately led to the licensing first of a 14-valent pneumococcal polysaccharide in 1977 followed by the 23-valent pneumococcal polysaccharide in 1983. 
The principal problem with these, as with other polysaccharide vaccines, was that that they failed to immunize infants and toddlers, who were at highest risk for pneumococcal disease. 
This was overcome by chemical linking or conjugation of the polysaccharide molecules to an immunogenic carrier protein. 
Thus began the era of pneumococcal conjugate vaccine (PCV), starting with PCV7, progressing to PCV10 and PCV13, and, most recently, PCV15 and PCV20. 
However, these vaccines remain serotype specific, posing the challenge of new serotypes replacing vaccine types. 
Current research addresses serotype-independent vaccines which, so far, has been a challenging and elusive endeavor.
Conclusion: While there has been enormous progress in the development of pneumococcal vaccines during the past century, attempts to develop a vaccine that will retain its efficacy for most pneumococcal serotypes are ongoing.
"""

# GliNER mode
model = GLiNER.from_pretrained("urchade/gliner_mediumv2.1")
model.eval()

labels = ["vaccine", "pathogen"]

entities = model.predict_entities(input_text, labels, threshold=0.4)

# for entity in entities:
#     print(entity["text"], "=>", entity["label"])

# Convert to DocRED format
input_preprocessed_to_docred = convert_gliner_to_docred(input_text, entities)


# Vaccine-Pathogen Relation Model
vaccine_pathogen_model_name = "thomaslim6793/vaccine-pathogen-model"
vaccine_pathogen_pipe = pipeline("vaccine-pathogen-relation", model=vaccine_pathogen_model_name, trust_remote_code=True)

# Run prediction
vaccine_pathogen_results = vaccine_pathogen_pipe(input_preprocessed_to_docred)


# filter by where "predicted_relation" is "vaccine_targets"
for doc_result in vaccine_pathogen_results:
    for pred in doc_result['predictions']:
        if pred['predicted_relation'] == 'vaccine_targets':
            print(pred)


Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 99864.38it/s]
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Fetching 0 files: 0it [00:00, ?it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 24385.49it/s]
Fetching 0 files: 0it [00:00, ?it/s]
Fetching 0 files: 0it [00:00, ?it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 11915.64it/s]
Fetching 0 files: 0it [00:00, ?it/s]
Device set to use mps:0


{'head_entity': '23-valent pneumococcal polysaccharide', 'tail_entity': 'pneumococcus', 'head_entity_type': 'vaccine', 'tail_entity_type': 'pathogen', 'predicted_relation': 'vaccine_targets', 'probabilities': {'N/A': 0.1463390588760376, 'vaccine_targets': 0.8536610007286072}, 'raw_logits': [-0.9035793542861938, 0.8600285053253174]}
{'head_entity': 'pneumococcal conjugate vaccine', 'tail_entity': 'Streptococcus pneumoniae', 'head_entity_type': 'vaccine', 'tail_entity_type': 'pathogen', 'predicted_relation': 'vaccine_targets', 'probabilities': {'N/A': 7.455573359038681e-05, 'vaccine_targets': 0.9999254941940308}, 'raw_logits': [-4.737787246704102, 4.766102313995361]}


In [None]:
len([entity for entity in entities if entity["label"] == "vaccine"])


8