In [1]:
import json
import sys
sys.path.append('/srv/scratch1/rosni/scigen/')
from ground_truth_path.prompts.relevancy_scoring_prompt_003 import relevancy_scoring_prompt
from ground_truth_path.prompts.tests.test_examples import few_shot_prompt_data, source_paper_data, year_data, papers_data
from ground_truth_path.utils import clean_gpt_output, evaluate_papers_with_llama, load_llama_model
import torch
import numpy as np
import random
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [2]:
llama_pipeline = load_llama_model()

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

In [3]:
def relevancy_prompt(source_paper, few_shot_prompt, year, paper_list):
    # update this in the utils.py file (after testing and if changed)

    prompt_template = relevancy_scoring_prompt()
    prompt = prompt_template.format(
        source_title=source_paper['title'],
        source_abstract=source_paper['abstract'],
        year=year,
        paper_list=paper_list
    )
    return prompt

In [4]:
source_paper = source_paper_data()
few_shot_prompt = few_shot_prompt_data()
year = year_data()
papers = papers_data()

# Generate the prompt
prompt = relevancy_prompt(source_paper, few_shot_prompt, year, papers)
print(prompt)


    Hypotheses are frequently the starting point when undertaking the empirical portion of the scientific process. They state something that the scientific process will attempt to evaluate, corroborate, verify, or falsify. Their purpose is to guide the types of data we collect, analyses we conduct, and inferences we would like to make. You are a scientist. Your job is to construct a novel and impactful hypothesis by navigating the literature.

    We have retrieved a knowledge graph of literature for you. You are given a source paper and a list of papers that followed
    from the source paper.
    You are evaluating the relevance of the following papers to the source paper. Starting from the source paper, you will analyze the following papers in this way. For every paper in the list, you output 0, 1, 2, 3:
    
    0: The hypothesis in this paper has no connection to the source paper's hypothesis or it addresses a related topic independently. OR the findings or this paper is a review

In [10]:
# Evaluate the prompt
response = evaluate_papers_with_llama(prompt, llama_pipeline)

for item in response:
    if item['role'] == 'assistant':
        try:
            llama_output = clean_gpt_output(item['content'])
        except json.JSONDecodeError as e:
            print(f"Error: {e}")
            


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [11]:
llama_output

{'paper_list': {'1. Perfusion Index as a Diagnostic Tool for Patent Ductus Arteriosus in Preterm Infants': {'explanation': 'The paper explores a diagnostic tool for patent ductus arteriosus (PDA), which is a condition that the source paper found was reduced by indomethacin prophylaxis. However, the paper does not directly build upon or challenge the findings of the source paper.',
   'relevance': 1},
  '2. Reduction of Severe Intraventricular Hemorrhage in Preterm Infants: A Quality Improvement Project': {'explanation': 'This paper discusses the reduction of severe intraventricular hemorrhage (sIVH) in preterm infants, which is a condition that the source paper found was reduced by indomethacin prophylaxis. The paper uses risk-based indomethacin prophylaxis as one of its targeted interventions, making it relevant to the source paper.',
   'relevance': 1},
  '3. Differential effects of ibuprofen and indomethacin on cerebral oxygen kinetics in the very preterm baby': {'explanation': "Thi

In [21]:
{'paper_list': {'1. Perfusion Index as a Diagnostic Tool for Patent Ductus Arteriosus in Preterm Infants': {'explanation': 'This paper investigates a diagnostic tool for patent ductus arteriosus, which is a condition that the source paper found to be reduced by indomethacin prophylaxis. However, the hypothesis in this paper is not directly inspired by or dependent on the findings of the source paper, as it focuses on a different aspect of the condition.', 'relevance': 0}, '2. Reduction of Severe Intraventricular Hemorrhage in Preterm Infants: A Quality Improvement Project': {'explanation': 'This paper aims to reduce severe intraventricular hemorrhage in preterm infants, a condition that the source paper found to be reduced by indomethacin prophylaxis. The hypothesis in this paper is partially dependent on the findings of the source paper, as it includes indomethacin prophylaxis as one of the targeted interventions.', 'relevance': 2}, '3. Differential effects of ibuprofen and indomethacin on cerebral oxygen kinetics in the very preterm baby': {'explanation': "This paper compares the effects of ibuprofen and indomethacin on cerebral oxygen kinetics in preterm babies. The hypothesis in this paper is partially dependent on the findings of the source paper, as it builds on the knowledge of indomethacin's effects on cerebral blood flow and oxygen delivery.", 'relevance': 2}}, 'top3_relevant_papers': {'1. Reduction of Severe Intraventricular Hemorrhage in Preterm Infants: A Quality Improvement Project': {'explanation': 'This paper aims to reduce severe intraventricular hemorrhage in preterm infants, a condition that the source paper found to be reduced by indomethacin prophylaxis. The hypothesis in this paper is partially dependent on the findings of the source paper, as it includes indomethacin prophylaxis as one of the targeted interventions.', 'relevance': 2}, '2. Differential effects of ibuprofen and indomethacin on cerebral oxygen kinetics in the very preterm baby': {'explanation': "This paper compares the effects of ibuprofen and indomethacin on cerebral oxygen kinetics in preterm babies. The hypothesis in this paper is partially dependent on the findings of the source paper, as it builds on the knowledge of indomethacin's effects on cerebral blood flow and oxygen delivery.", 'relevance': 2}}}

### BMR model testing

In [1]:
intial_source_abstract = "We admitted 48 preterm neonates (600 to 1250 gm birth weight, normal 6-hour echoencephalograms) to a randomized prospective indomethacin or placebo trial for the prevention of neonatal intraventricular hemorrhage. Beginning at 6 postnatal hours, indomethacin or placebo was administered intravenously every 12 hours for a total of five doses. Cardiac ultrasound studies to assess the status of the ductus arteriosus were performed at 6 postnatal hours and on day 5. Urinary output, serum electrolytes, and renal and clotting functions were monitored. No differences in birth weight, gestational age, Apgar scores, or ventilatory needs were noted between the two groups. Six infants given indomethacin had intraventricular hemorrhage, compared to 14 control infants (P = 0.02). The indomethacin-treated group had significant decreases in serum prostaglandin values 30 hours after the initiation of therapy. The overall incidence of patent ductus arteriosus was 82% at 6 postnatal hours; 84% of the indomethacin-treated infants experienced closure of the ductus, compared to 60% of the placebo-treated patients. Closure of the ductus was not related to incidence of intraventricular hemorrhage. We speculate that indomethacin may provide some protection against neonatal intraventricular hemorrhage by acting on the cerebral microvasculature."

# Each query must come with a one-sentence instruction that describes the task
task = 'Given a scientific abstract, retrieve documents that support or refute the claims from this abstract'

# Tokenize and encode inputs
source_abstract = "BACKGROUND\nThe prophylactic administration of indomethacin reduces the frequency of patent ductus arteriosus and severe intraventricular hemorrhage in very-low-birth-weight infants (those with birth weights below 1500 g). Whether prophylaxis with indomethacin confers any long-term benefits that outweigh the risks of drug-induced reductions in renal, intestinal, and cerebral blood flow is not known.\n\n\nMETHODS\nSoon after they were born, we randomly assigned 1202 infants with birth weights of 500 to 999 g (extremely low birth weight) to receive either indomethacin (0.1 mg per kilogram of body weight) or placebo intravenously once daily for three days. The primary outcome was a composite of death, cerebral palsy, cognitive delay, deafness, and blindness at a corrected age of 18 months. Secondary long-term outcomes were hydrocephalus necessitating the placement of a shunt, seizure disorder, and microcephaly within the same time frame. Secondary short-term outcomes were patent ductus arteriosus, pulmonary hemorrhage, chronic lung disease, ultrasonographic evidence of intracranial abnormalities, necrotizing enterocolitis, and retinopathy.\n\n\nRESULTS\nOf the 574 infants with data on the primary outcome who were assigned to prophylaxis with indomethacin, 271 (47 percent) died or survived with impairments, as compared with 261 of the 569 infants (46 percent) assigned to placebo (odds ratio, 1.1; 95 percent confidence interval, 0.8 to 1.4; P=0.61). Indomethacin reduced the incidence of patent ductus arteriosus (24 percent vs. 50 percent in the placebo group; odds ratio, 0.3; P<0.001) and of severe periventricular and intraventricular hemorrhage (9 percent vs. 13 percent in the placebo group; odds ratio, 0.6; P=0.02). No other outcomes were altered by the prophylactic administration of indomethacin.\n\n\nCONCLUSIONS\nIn extremely-low-birth-weight infants, prophylaxis with indomethacin does not improve the rate of survival without neurosensory impairment at 18 months, despite the fact that it reduces the frequency of patent ductus arteriosus and severe periventricular and intraventricular hemorrhage."
candidate_documents = [
    "Title: Perfusion Index as a Diagnostic Tool for Patent Ductus Arteriosus in Preterm Infants Abstract: Background: Perfusion index (PI) could reflect peripheral flow. Preterm infants with hemodynamically significant patent ductus arteriosus (hsPDA) will have left-to-right shunt across PDA causing less blood flow to the lower legs. Objective: To evaluate pre- and postductal PI differences (ΔPI) in hsPDA. Methods: Preterm infants with gestational age <34 weeks were assessed for ΔPI on days 1, 3, and 7 of life with simultaneous echocardiography. Based on echocardiography, each infant was categorized into hsPDA, non-hsPDA, and no PDA. Results: Thirty infants (16 males), median age 31 weeks (interquartile range, IQR, 29-32) and weight 1,490 g (IQR 1,100-1,670) were enrolled. On days 1 and 3 of life, the ΔPI of infants with hsPDA (1.57%, IQR 0.28-2.32, n = 14, and 1.32%, IQR 0.28-1.83, n = 10) were significantly higher than those without hsPDA (0.14%, IQR -0.03 to 0.30, n = 16, and 0.08%, IQR -0.07 to 0.26, n = 20), p = 0.009 and 0.005, respectively. At all time points (days 1, 3, and 7 of life, n = 84), ΔPI >1.05% had sensitivity, specificity, positive predictive value, and negative predictive value of 66.7, 100, 100, and 86.4%, respectively, to detect hsPDA. Conclusion: The pre- and postductal PI differences were significantly related to the hemodynamic changes of PDA and might be useful to detect hemodynamically significant PDA.",
    "Title: Reduction of Severe Intraventricular Hemorrhage in Preterm Infants: A Quality Improvement Project. Abstract: OBJECTIVES\nThe aim of this quality improvement project was to reduce the rate of severe intraventricular hemorrhage (sIVH) by 50% within 3 years for extremely preterm infants born at a children\'s teaching hospital.\n\n\nMETHODS\nA multidisciplinary team developed key drivers for the development of intraventricular hemorrhage in preterm infants. Targeted interventions included the development of potentially better practice guidelines, promoting early noninvasive ventilation, consistent use of rescue antenatal betamethasone, and risk-based indomethacin prophylaxis. The outcome measure was the rate of sIVH. Process measures included the rate of intubation within 24 hours and receipt of rescue betamethasone and risk-based indomethacin prophylaxis. Common markers of morbidity were balancing measures. Data were collected from a quarterly chart review and analyzed with statistical process control charts. The preintervention period was from January 2012 to March 2016, implementation period was from April 2016 to December 2018, and sustainment period was through June 2020.\n\n\nRESULTS\nDuring the study period, there were 268 inborn neonates born at <28 weeks\' gestation or <1000 g (127 preintervention and 141 postintervention). The rate of sIVH decreased from 14% to 1.2%, with sustained improvement over 2 and a half years. Mortality also decreased by 50% during the same time period. This was associated with adherence to process measures and no change in balancing measures.\n\n\nCONCLUSIONS\nA multipronged quality improvement approach to intraventricular hemorrhage prevention, including evidence-based practice guidelines, consistent receipt of rescue betamethasone and indomethacin prophylaxis, and decreasing early intubation was associated with a sustained reduction in sIVH in extremely preterm infants.",
    "Title: Differential effects of ibuprofen and indomethacin on cerebral oxygen kinetics in the very preterm baby Abstract: Background Ibuprofen is preferred to indomethacin for treatment of a significant patent ductus arteriosus (PDA) in preterm babies despite indomethacin being associated with a lower risk of intraventricular haemorrhage. This difference is thought to relate to the discrepant effects of each medication on cerebral oxygen kinetics yet the effect of ibuprofen on cerebral perfusion is uncertain. Methods Forty-eight babies < 30 weeks with a significant PDA, defined by echocardiography, were randomly assigned to either indomethacin or ibuprofen (n = 24 per group) and stratified by gestation and chronologic age. Cerebral blood flow [total internal carotid blood flow (TICF)] and oxygen physiology [oxygen delivery (modCerbDO2) and consumption (modCerbVO2)] were measured using cranial Doppler ultrasound and near-infrared spectroscopy, and cerebral oxygen extraction (cFTOE) calculated, immediately before and following administration. Temporal and treatment related changes were analysed. Results A fixed effect of time was seen for TICF (p = 0.03) and therefore modCerbDO2 (p = 0.046) and cFTOE (p = 0.04) for indomethacin alone. In the indomethacin group, TICF and modCerbDO2 fell from baseline to 5 and 30 min respectively (TICF p < 0.01, cDO2 p = 0.01) before increasing from 5 min to 24 h (p < 0.01) and 30 min and 24 h (p < 0.01) timepoints. cFTOE peaked at 30 min (p = 0.02) returning to baseline at 24 h. There was a parallel increase in arterial lactate. Conclusion Indomethacin significantly reduces cerebral blood flow soon after administration, resulting in a parallel increase in oxygen extraction and arterial lactate. This implies that the balance of oxygen kinetics at the time of treatment may be critical in very preterm babies with significant PDA."
]


In [6]:
import os
# add gpu support
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
import torch
from transformers import AutoModel, AutoTokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "BMRetriever/BMRetriever-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device)

from torch.nn.functional import cosine_similarity
import torch
import torch.nn.functional as F
from torch import Tensor

def last_token_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        embedding = last_hidden[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden.shape[0]
        embedding = last_hidden[torch.arange(batch_size, device=last_hidden.device), sequence_lengths]
    return embedding

def get_detailed_instruct_query(task_description: str, query: str) -> str:
    return f'{task_description}\nQuery: {query}'

def get_detailed_instruct_passage(passage: str) -> str:
    return f'Represent this passage\npassage: {passage}'

# Function to compute weighted similarity
def compute_weighted_similarity(source_embedding: Tensor, doc_embeddings: Tensor, weights: Tensor) -> Tensor:
    similarities = cosine_similarity(source_embedding.unsqueeze(0), doc_embeddings)
    weighted_similarities = similarities * weights
    return weighted_similarities.sum()


# Encode the initial source abstract
initial_input = tokenizer(intial_source_abstract, max_length=512, padding=True, truncation=True, return_tensors='pt')
initial_input = {key: value.to(device) for key, value in initial_input.items()}  # Move all inputs to GPU(s)
initial_embedding = last_token_pool(
    model(**initial_input).last_hidden_state, initial_input['attention_mask']
)

# Tokenize and encode the source abstract
source_input = tokenizer(source_abstract, max_length=512, padding=True, truncation=True, return_tensors='pt')
source_input = {key: value.to(device) for key, value in source_input.items()}  # Move all inputs to GPU(s)
source_embedding = last_token_pool(
    model(**source_input).last_hidden_state, source_input['attention_mask']
)

# Encode the candidate documents
doc_embeddings = []
for doc in candidate_documents:
    doc_input = tokenizer(doc, max_length=512, padding=True, truncation=True, return_tensors='pt')
    doc_input = {key: value.to(device) for key, value in doc_input.items()}  # Move all inputs to GPU(s)
    doc_embedding = last_token_pool(
        model(**doc_input).last_hidden_state, doc_input['attention_mask']
    )
    doc_embeddings.append(doc_embedding)

# Normalize embeddings
source_embedding = F.normalize(source_embedding, p=2, dim=-1)
initial_embedding = F.normalize(initial_embedding, p=2, dim=-1)
doc_embeddings = torch.stack(doc_embeddings, dim=0).squeeze(1).to(device)  # Correct shape and move to device
doc_embeddings = F.normalize(doc_embeddings, p=2, dim=-1)  # Normalize

# Compute cosine similarities
source_similarities = cosine_similarity(source_embedding, doc_embeddings)
initial_similarities = cosine_similarity(initial_embedding, doc_embeddings)

# Compute weighted similarities
w1, w2 = 0.3, 0.7
weighted_similarities = (w1 * initial_similarities) + (w2 * source_similarities)

# Get the most relevant document
best_doc_idx = weighted_similarities.argmax()
print(f"Most relevant document index (weighted): {best_doc_idx}")

# Optional: Print similarity scores for debugging
print("Source Similarities:", source_similarities.cpu().detach().numpy())
print("Initial Similarities:", initial_similarities.cpu().detach().numpy())
print("Weighted Similarities:", weighted_similarities.cpu().detach().numpy())


Most relevant document index (weighted): 1
Source Similarities: [0.95600104 0.9667444  0.9664413 ]
Initial Similarities: [0.9450358  0.963362   0.96016985]
Weighted Similarities: [0.95271146 0.9657297  0.9645599 ]
