In [None]:
import json

with open('../Data/passages/5621_passage.json', 'r') as json_file:
    passage_data = json.load(json_file)

In [None]:
from sentence_transformers.cross_encoder import CrossEncoder
import numpy as np
model = CrossEncoder("../Models/DPR/Distilbert-base-uncased")

query = "Did the review adhere to pre-defined objectives and eligibility criteria?"

pairs = []
for data in passage_data:
    pairs.append([query, data['contents']])

scores = model.predict(pairs)
scores = np.array(scores)
print(scores)
sorted_indices = np.argsort(scores)[::-1]
best_passage_index = sorted_indices[0]
print(passage_data[best_passage_index]['contents'])

In [None]:
from dpr import get_robis_pairs, get_amstar_pairs

robis_pairs = get_robis_pairs(passage_data)
amstar_pairs = get_amstar_pairs(passage_data)

In [None]:
question_id = robis_pairs[0]['question ID']
robis_ID_lst = ['1.1', '1.2', '1.3', '1.4', '1.5', '2.1', '2.2', '2.3', '2.4', '2.5',
                      '3.1', '3.2', '3.3', '3.4', '3.5', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6',
                      'A', 'B', 'C']

robis_query_lst = ["Did the review adhere to pre-defined objectives and eligibility criteria?", 
            "Were the eligibility crieria appropriate for the review question?", 
            "Were eligibility criteria unambiguous?", 
            "Were all restrictions in eligibility crieria based on study characteristics appropriate (e.g. date, sample size, study quality, outcomes measured)? If yes, indicate which study characteristic was an inclusion/exclusion criteria", 
            "Were any restrictions in eligibility criteria based on sources of info appropriate (e.g. publication status or format, language, availability of data)?", 
            "Did the search include an appropriate range of databases/ electronic sources for published and unpublished reports?", 
            "Were methods additional to database searching used to identify relevant reports?", 
            "Were the terms and structure of the search strategy likely to retrieve as many eligible studies as possible?", 
            "Were search strategy restrictions based on date, publication format, or language appropriate?", 
            "Were efforts made to minimise error in selection of studies?", 
            "Were efforts made to minimise error in data collection?", 
            "Were sufficient study characteristics considered for both review authors and readers to be able to interpret the results?", 
            "Were all relevant study results collected for use in the synthesis?", 
            "Was risk of bias (or methodological quality) formally assessed using appropriate criteria?", 
            "Were efforts made to minimise error in risk of bias assessment?", 
            "Did the synthesis include all studies that it should?", 
            "Were all pre-defined analyses reported or departures explained?",
            "Was the synthesis appropriate given the nature and similarity in the research questions, study designs and outcomes across included studies?",
            "Was between-study variation (heterogeneity) minimal or addressed in the synthesis?", 
            "Were the findings robust, e.g. as demonstrated through funnel plot or sensitivity analyses?", 
            "Were biases in primary studies minimal or addressed in the synthesis?", 
            "Did the interpretation of findings address all of the concerns identified in Domains 1 to 4?", 
            "Was the relevance of identified studies to the review's research question appropriately considered?", 
            "Did the reviewers avoid emphasizing results on the basis of their statistical significance?"]

question_index = robis_ID_lst.index(question_id)
query = robis_query_lst[question_index]
context = robis_pairs[0]['context']


In [None]:
from litellm import completion

response = completion(
    model="ollama/phi3", 
    messages=[{"content": "I am going to provide a question and a context. Answer the question based on the context given.","role": "user"},
              {"content": "Yes. Please provide the question.","role": "assistant"},
              {"content": query + " Please answer the question by choosing 'Yes' or 'No'. Then provide a quote that supports your answer.","role": "user"},
              {"content": "Yes. Please provide the context.","role": "assistant"},
              {"content": context,"role": "user"}], 
    api_base="your API address here"
)
print(response)


In [None]:
import instructor
from litellm import completion
from pydantic import BaseModel


class Answer(BaseModel):
    answer_class: str
    quote: str


client = instructor.from_litellm(completion)

resp = client.chat.completions.create(
    model="ollama/phi3",
    max_tokens=1024,
    messages=[{"content": "I am going to provide a question and a context. Answer the question based on the context given.","role": "user"},
              {"content": "Yes. Please provide the question.","role": "assistant"},
              {"content": query + " Please answer the question by choosing 'Yes' or 'No'. Then provide a quote that supports your answer.","role": "user"},
              {"content": "Yes. Please provide the context.","role": "assistant"},
              {"content": context,"role": "user"}],
    api_base="your API address here",
    response_model=Answer,
)

In [None]:
print(resp.answer_class)
print(resp.quote)

In [None]:
answer_class_lst = []
quote_lst = []
answer_class_lst.append(resp.answer_class)
quote_lst.append(resp.quote)
print(answer_class_lst)
print(quote_lst)

In [None]:
def get_answer(messages, generation_args, pipe):
    output = pipe(messages, **generation_args)
    answer = output[0]['generated_text'].split("answer:")[1].split("quote:")[0].strip()[:-1]
    quote = output[0]['generated_text'].split("quote:")[1].strip()
    return answer, quote

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", 
    device_map="cuda", 
    torch_dtype="auto", 
    trust_remote_code=True, 
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}


In [None]:
retriever_data[23]

In [None]:
robis_question_lst = ['1.1', '1.2', '1.3', '1.4', '1.5', '2.1', '2.2', '2.3', '2.4', '2.5',
                      '3.1', '3.2', '3.3', '3.4', '3.5', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6',
                      'A', 'B', 'C']

amstar_question_lst = ['2', '1', '3', '4', '5', '6', '7', '8', '9', '11', '14', '15', '12', '10', '16', '13']

In [None]:
result = []
i = 0
while i < len(retriever_data):
    item = retriever_data[i]
    
    if i < 24:
        question_type = 'ROBIS'
        question_id = robis_question_lst[i]
    else:
        question_type = "AMSTAR-2"
        question_id = amstar_question_lst[i-24]
    
    question = item['question']
    if item['positive_ctxs'] == []:
        answer = "No Information"
        quote = "No Information"
    else:
        passage = passage_data[item['positive_ctxs'][0]]['contents']
        messages = [
            {"role": "user", "content": "Based on the paragraph, answer a question. The paragraph is: " + passage},
            {"role": "user", "content": "The question is: " + question + "Only answer between 'Yes' and 'No'. Then, give me a quote from the paragraph that supports the answer without explanation. Answer in the format of 'answer:'', quote: ''. Do not include explanations other than the answer and quote."}
        ]
        answer, quote = get_answer(messages, generation_args, pipe)
    dict = {"question_id": question_id, "context": passage}
    result.append()
