In [None]:
import pandas as pd
import numpy as np

In [None]:
## Prompts

FEW_SHOT_ATOMIC_VERDICT = '''
You are a helpful AI assistant answering medical and clinical questions.

You will be given an input medical case, input question, a statement that is related to the question, and input context related to the statement. 
Based on the given input question and input context, is the statement true or false? Please only refer to the veracity of the given statement, not the veracity of the whole question.
Please only answer with TRUE or FALSE.

########
    
INPUT QUESTION:
A patient with metastatic CRPC is unable to tolerate the standard 1000 mg/day dose of abiraterone due to financial constraints. What alternative dosing strategy could be considered?

INPUT CONTEXT:
Abiraterone can be given at 250 mg/day and administered following a low- fat breakfast as an alternative to the dose of 1000 mg/day after an overnight fast.

STATEMENT:
An alternative dosing strategy exists for a patient with metastatic castration-resistant prostate cancer (CRPC) who cannot tolerate the standard 1000 mg/day dose of abiraterone due to financial constraints.

Is the given statement TRUE or FALSE based on question and context? The statement is: TRUE

########

INPUT QUESTION:
A patient with metastatic CRPC is unable to tolerate the standard 1000 mg/day dose of abiraterone due to financial constraints. What alternative dosing strategy could be considered?

INPUT CONTEXT:
Therefore, abiraterone can be given at 250 mg/day administered following a low-fat breakfast, as an alternative to the dose of 1000 mg/day after an overnight fast in patients who will not take or cannot afford the standard dose. The cost savings may reduce financial toxicity and improve adherence.

STATEMENT:
Taking abiraterone with a low-fat breakfast may help improve adherence.

Is the given statement TRUE or FALSE based on question and context? The statement is: FALSE

########

INPUT QUESTION:
A patient with a newly diagnosed prostate cancer has a clinical stage of T2c, 50% biopsy cores positive, PSA of 6 ng/ml and a Gleason score of 7a. How might the NCCN risk stratification schema categorize this patient?

INPUT CONTEXT:
Clinicians should use clinical T stage, serum PSA, Grade Group (Gleason score), and tumor volume on biopsy to risk stratify patients with newly diagnosed prostate cancer

STATEMENT:
The risk-classification is based on the combination of clinical stage, Gleason score, and PSA level.

Is the given statement TRUE or FALSE based on question and context? The statement is: TRUE

########

INPUT QUESTION:
A patient with a newly diagnosed prostate cancer has a clinical stage of T2c, 50% biopsy cores positive, PSA of 6 ng/ml and a Gleason score of 7a. How might the NCCN risk stratification schema categorize this patient?

INPUT CONTEXT:
Specifically, the NCCN Guidelines subdivide intermediate-risk disease into favourable and unfavourable intermediate-risk, with unfavourable features including ISUP grade group 3, and/or ≥ 50% positive systematic biopsy cores and/or at least two intermediate-risk factors. Intermediate risk factors are cT2b–cT2c, Grade Group 2 or, 3 PSA 10–20 ng/mL.

STATEMENT:
The patient would be categorized as intermediate-risk.

Is the given statement TRUE or FALSE based on question and context? The statement is: FALSE

########

INPUT QUESTION:
How do I treat a patient according to the ASCENDE-RT trial?

INPUT CONTEXT:
The randomized ASCENDE-RT trial compared two methods of dose escalation in 398 patients with intermediate- or high-risk prostate cancer: dose-escalated EBRT boost to 78 Gy or LDR brachytherapy boost. All patients were initially treated with 12 months of ADT and pelvic EBRT to 46 Gy.

STATEMENT:
All patients previously recieved 12 months of ADT and EBRT to 46 Gy.

Is the given statement TRUE or FALSE based on question and context? The statement is: TRUE

########

INPUT QUESTION:
How do I treat a patient according to the ASCENDE-RT trial?

INPUT CONTEXT:
The randomized ASCENDE-RT trial compared two methods of dose escalation in 398 patients with intermediate- or high-risk prostate cancer: dose-escalated EBRT boost to 78 Gy or LDR brachytherapy boost. All patients were initially treated with 12 months of ADT and pelvic EBRT to 46 Gy

STATEMENT:
According to the ASCENDE-RT trial, patients with biochemical recurrence (BCR) after radical prostatectomy should be treated with early salvage radiotherapy (SRT) combined with androgen deprivation therapy (ADT).

Is the given statement TRUE or FALSE based on question and context? The statement is: FALSE

########

INPUT QUESTION:
A 55-year-old man with a life expectancy of over 10 years has been diagnosed with very low-risk prostate cancer. What management strategy is recommended for him according to the NCCN guidelines?

INPUT CONTEXT:
At this time, the NCCN Panel consensus is that active surveillance is preferred for all patients with very-low-risk prostate cancer and life expectancy greater than 10 years.

STATEMENT:
Active surveillance is preferred for patients with very low-risk prostate cancer.

Is the given statement TRUE or FALSE based on question and context? The statement is: TRUE

########

INPUT QUESTION:
A 55-year-old man with a life expectancy of over 10 years has been diagnosed with very low-risk prostate cancer. What management strategy is recommended for him according to the NCCN guidelines?

INPUT CONTEXT:
Active surveillance is preferred for patients with very-low-risk prostate cancer and a life expectancy
≥10 years. (Observation is preferred for patients with a life expectancy <10 years and very-low-risk disease.) Active surveillance is preferred for most patients with low-risk prostate cancer and a life expectancy ≥10 years. The panel recognizes that there is heterogeneity across this risk group, and that some factors may be associated with an increased probability of near-term grade reclassification including high PSA density, a high number of positive cores (eg, ≥3), and high genomic risk (from tissue-based molecular tumor analysis). For some of these patients, upfront treatment with RP or prostate RT may be preferred based on shared decision-making.

STATEMENT:
According to the NCCN guidelines, the patient is a 55-year-old man.

Is the given statement TRUE or FALSE based on question and context? The statement is: FALSE

########
'''

FEW_SHOT_REWRITE = '''
You are a helpful AI assistant answering medical and clinical questions.

You will be given an input medical case, input question, a statement that is related to the question, and input context related to the statement. 
The statement was found not to be supported by the given input context when answering the given input question. 
Please rewrite the statement to be supported by the input context in terms of input question.

########

INPUT QUESTION:
A patient with metastatic CRPC is unable to tolerate the standard 1000 mg/day dose of abiraterone due to financial constraints. What alternative dosing strategy could be considered?

INPUT CONTEXT:
Therefore, abiraterone can be given at 250 mg/day administered following a low-fat breakfast, as an alternative to the dose of 1000 mg/day after an overnight fast in patients who will not take or cannot afford the standard dose. The cost savings may reduce financial toxicity and improve adherence.

STATEMENT:
Taking abiraterone with a low-fat breakfast may help improve adherence.

The rewritten statement is: Taking a lower dose of arbiteraone may help improve adherence due to a lower financial burden.

########

INPUT QUESTION:
A patient with recurrent prostate cancer after radical prostatectomy shows a PSA level of 0.7 ng/ml and ISUP grade group 4. The physician is considering salvage radiotherapy RT. What do you recommend for this patient?

INPUT CONTEXT:
within two years of BCR showed that SRT was associated with a 3-fold increase in PCa-specific survival relative to those who received no salvage treatment (p < 0.001). Salvage RT has been shown to be effective mainly in patients with a short PSA-DT [960]. In a retrospective multi-centre study including 25,551 patients with at most one high-risk factor after RP (ISUP grade group 4-5 or pT3/4), initiating sRT above a PSA level of 0.25 ng/mL was associated with increased ACM-risk. After a median follow-up of six years, patients who received sRT at a PSA level >0.25 ng/mL had a significantly higher ACM-risk (AHR, 1.49; 95% CI, 1.11 to 2.00; P =.008) compared with men who received sRT when the PSA was ≤0.25 mg/mL [961]. For an overview of SRT see Table 6.4.3. The EAU BCR definitions have been externally validated and may be helpful for individualised treatment decisions [898, 903]. Despite the indication for salvage RT, a ‘wait and see‘ strategy remains an option for the EAU BCR ‘Low-Risk’ group [898,

STATEMENT:
The patient's current PSA level of 0.7 ng/ml falls within the range that is beneficial for SRT.

The rewritten statement is: The patient's current PSA level of 0.7 ng/ml is above the threshold of 0.25 ng/ml, which has been associated with an increased risk of adverse outcomes; therefore, careful consideration should be given to initiating salvage radiotherapy (SRT) as it may still provide a benefit in this context.

########

INPUT QUESTION:
A patient with a newly diagnosed prostate cancer has a clinical stage of T2c, 50% biopsy cores positive, PSA of 6 ng/ml and a Gleason score of 7a. How might the NCCN risk stratification schema categorize this patient?

INPUT CONTEXT:
Specifically, the NCCN Guidelines subdivide intermediate-risk disease into favourable and unfavourable intermediate-risk, with unfavourable features including ISUP grade group 3, and/or ≥ 50% positive systematic biopsy cores and/or at least two intermediate-risk factors. Intermediate risk factors are cT2b–cT2c, Grade Group 2 or, 3 PSA 10–20 ng/mL.

STATEMENT:
The patient would be categorized as intermediate-risk.

The rewritten statement is: The patient would be categorized as intermediate risk, more specifically intermediate-unfavorable.

########

INPUT QUESTION:
How do I treat a patient according to the ASCENDE-RT trial?

INPUT CONTEXT:
The randomized ASCENDE-RT trial compared two methods of dose escalation in 398 patients with intermediate- or high-risk prostate cancer: dose-escalated EBRT boost to 78 Gy or LDR brachytherapy boost. All patients were initially treated with 12 months of ADT and pelvic EBRT to 46 Gy.

STATEMENT:
According to the ASCENDE-RT trial, patients with biochemical recurrence (BCR) after radical prostatectomy should be treated with early salvage radiotherapy (SRT) combined with androgen deprivation therapy (ADT).

The rewritten statement is: In the ASCENDE-RT trial, patients with intermediate or high-risk prostate cancer were treated with External Beam radiotherapy (EBRT) to the prostate and whole pelvis (46 Gy) followed by a Low Dose Rate (LDR) Brachytherapy boost. All patients received 12 months of ADT.

########

INPUT QUESTION:
A 55-year-old man with a life expectancy of over 10 years has been diagnosed with very low-risk prostate cancer. What management strategy is recommended for him according to the NCCN guidelines?

INPUT CONTEXT:
Active surveillance is preferred for patients with very-low-risk prostate cancer and a life expectancy ≥10 years. (Observation is preferred for patients with a life expectancy <10 years and very-low-risk disease.) Active surveillance is preferred for most patients with low-risk prostate cancer and a life expectancy ≥10 years. The panel recognizes that there is heterogeneity across this risk group, and that some factors may be associated with an increased probability of near-term grade reclassification including high PSA density, a high number of positive cores (eg, ≥3), and high genomic risk (from tissue-based molecular tumor analysis). 

STATEMENT:
According to the NCCN guidelines, the patient is a 55-year-old man.

The rewritten statement is: The patient is a 55-year old man.

'''

### Data Processing and Database Creation

In [None]:
'''
Traverse through the knowledge base directory, read all files with PaperMage (library for OCR), save them as JSON.

PaperMage can recognize section names in scientific papers, page numbers, etc.
'''

from papermage.recipes import CoreRecipe

#Load PaperMage and test with one document.
recipe = CoreRecipe()
doc = recipe.run("tests/fixtures/papermage.pdf")


# Traverse and run
import os
import json

top_directory = "/home/jvladika/LLM-KB/"
subfolders = os.listdir(top_directory)

all_texts = list()
all_names = list()

for sub in subfolders[0:1]:
    subname = os.path.join(top_directory, sub)

    for filename in os.listdir(subname):
   
        print(filename)
        all_names.append(filename)
        
        try:
            filepath = str(os.path.join(subname, filename))
            doc = recipe.run(filepath)
            
            with open('./LLM-KB/processed_docs/'+filename+'.json', 'w') as f_out:
                json.dump(doc.to_json(), f_out, indent=4)

        except Exception as e:
            print(e)
            continue
     

In [None]:
'''
Once the JSONs are created, we can use them to create a vector database.
'''

import os

top_directory = "./Prostate_KB/"
filenames = os.listdir(top_directory)

all_text = list()
all_names = list()

idx = 0
subname = top_directory

for filename in filenames:
    print(filename)
    all_names.append(filename)
    
    #try:
    filepath = str(os.path.join(subname, filename))
    
    with open('./Prostate_KB/'+filename, 'r', encoding='utf-8', errors='ignore') as f:
        all_text.append(f.read())
    idx += 1

    #except Exception as e:
    #    print(e)
    #    continue 


import sys
import chromadb

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

'''
Use the ChromaDB as a vector database to store embeddings, implementation through LangChain.
'''

documents = all_text

# Chunk documents into pieces of 1024 characters (around 100-150 words)
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=200, separator=" ")
chunks_per_docs = list()
for doc in documents:
    splitted = text_splitter.split_text(doc)
    chunks_per_docs.append(splitted)

chunks_docs = list()
cnt = 0
for (chunks_doc, doc_name) in zip(chunks_per_docs, all_names):
    doc_objects = text_splitter.create_documents(chunks_doc) 
    for do in doc_objects:
        do.metadata = {"source" : doc_name}
        #do.page_content = "Prostate Cancer Guideline: " + do.page_content
    chunks_docs.extend(doc_objects)


model_kwargs = {'device': 'mps'}

# Choose the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="pritamdeka/S-PubMedBert-MS-MARCO-SCIFACT", model_kwargs=model_kwargs)

from copy import deepcopy
prostate_chunks_list = deepcopy(chunks_docs)

# Load it into Chroma DB
prostate_db = Chroma.from_documents(prostate_chunks_list, embedding_function, persist_directory="vector_data/prostate_db_9")


##### Same for Breast KB

top_directory = "./Breast_KB/"
filenames = os.listdir(top_directory)

all_text = list()
all_names = list()
idx = 0
subname = top_directory

for filename in filenames:    
    print(filename)
    all_names.append(filename)
    
    #try:
    filepath = str(os.path.join(subname, filename))
    
    with open('./Breast_KB/'+filename, 'r', encoding='utf-8', errors='ignore') as f:
        all_text.append(f.read())

    idx += 1

breast_docs = all_text

# Chunk documents into pieces of 1024 characters (around 100-150 words)
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=200, separator=" ")
chunks_per_docs = list()
for doc in breast_docs:
    splitted = text_splitter.split_text(doc)
    chunks_per_docs.append(splitted)

cnt = 0
chunks_docs = list()
for (chunks_doc, doc_name) in zip(chunks_per_docs, all_names):
    doc_objects = text_splitter.create_documents(chunks_doc) 
    for do in doc_objects:
        do.metadata = {"source" : doc_name}
        #do.page_content = "Breast Cancer Guideline: " + do.page_content
    chunks_docs.extend(doc_objects)


model_kwargs = {'device': 'mps'}

# Choose the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="pritamdeka/S-PubMedBert-MS-MARCO-SCIFACT", model_kwargs=model_kwargs)

from copy import deepcopy
breast_chunks_list = deepcopy(chunks_docs)

# Load it into Chroma DB
breast_db = Chroma.from_documents(breast_chunks_list, embedding_function, persist_directory="vector_data/breast_db_9")


In [None]:
'''
If chunking already done and stored, load persisted data into the vector databases.
'''

import chromadb

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

model_kwargs = {'device': 'mps'}
embedding_function = SentenceTransformerEmbeddings(model_name="pritamdeka/S-PubMedBert-MS-MARCO-SCIFACT", model_kwargs=model_kwargs)


prostate_db = Chroma(persist_directory="vector_data/prostate_db_6", embedding_function=embedding_function)

breast_db = Chroma(persist_directory="vector_data/breast_db_6", embedding_function=embedding_function)


### Question Answering and Fact Checking

In [None]:
'''
Load the datasets of questions and answers about prostate/breast cases.
'''

df_breast = pd.read_csv("QA_pairs_breast_RA.csv")
df_breast = df_breast[df_breast.human_answer.notna()]
breast_questions = df_breast.question.tolist()
breast_answers = df_breast.human_answer.tolist()

df_prostate = pd.read_csv("QA_pairs_excel_JP.csv")
df_prostate = df_prostate[df_prostate.human_answer.notna()]
prostate_questions = df_prostate.question.tolist()
prostate_answers = df_prostate.human_answer.tolist()



### Template for QA generation with few-shot examples from first four questions


FEW_SHOT_TEMPLATE = '''
You are a helpful AI assistant answering medical and clinical questions. Here are some examples of questions and answers.

**Few-Shot Examples:**'''

FEW_SHOT_TEMPLATE += f'''
    **Question:** {prostate_questions[0]}
    **Answer:** {prostate_answers[0]}
'''

FEW_SHOT_TEMPLATE += f'''
    **Question:** {prostate_questions[1]}
    **Answer:** {prostate_answers[1]}
'''

FEW_SHOT_TEMPLATE += f'''
    **Question:** {prostate_questions[2]}
    **Answer:** {prostate_answers[2]}
'''

FEW_SHOT_TEMPLATE += f'''
    **Question:** {prostate_questions[3]}
    **Answer:** {prostate_answers[3]}
'''

FEW_SHOT_TEMPLATE

In [None]:
'''
Generate answers to questions based on few-shot prompt.
'''

import os
from openai import OpenAI


def get_system_prompt(query) -> str:

    prompt = FEW_SHOT_TEMPLATE + '''
    ---
    Now, please answer the following question based on the input context provided. The context can be noisy. Please only use the information from the context. 
    Please provide a factual and clear answer, similar in style and length to the examples above.
    
    **Question:** '''+query + "\n\n"

    return prompt

def get_chat_prompt(input_context: str, query, system_message: bool = True) -> list:
    message = []

    if system_message:
        message.append({"role": "system", "content": f"{get_system_prompt(query)}"})

    message.append({"role": "user", "content": f"""INPUT CONTEXT: {input_context}
    
            **Answer:**: """})
    return message


apikey = 'KEY'
client = OpenAI(api_key=apikey)


def query_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o", # set model version
        max_tokens = 4096,
        messages=prompt, # provide prompt in chat format
        temperature=0) # set model temperature = 0
    return response


all_responses = list()

for question in prostate_questions:
    query = question

    ## Get top 7 chunks (most similar to the query) from vector DB, and concatenate into context.
    results = prostate_db.search(query, "mmr", k=7)
    context = ""
    for r in results:
        context += r.page_content
        context += "\n"
    
    prompt = get_chat_prompt(context, query)
    response = query_gpt(prompt=prompt)
    result = response.choices[0].message.content.strip()

    all_responses.append(result)
 
''' 
for question in breast_questions:
    query = question
    results = breast_db.search(query, "similarity", k=7)
    context = ""
    for r in results:
        context += r.page_content
        context += "\n"
    
    prompt = get_IE_zero_shot_chat_prompt(context, query)
    response = query_gpt(prompt=prompt)
    result = response.choices[0].message.content.strip()

    all_responses.append(result)
''' 

print(all_responses)

In [None]:

'''
Split the responses into atomic facts based on the few-shot examples.
'''


client = OpenAI(api_key=apikey)

def get_atomic_facts(resp):
    
    def get_system_prompt() -> str:
    
        prompt = '''
        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list):
        For a 72-year-old male patient with locally advanced prostate cancer (cT3/cT4) treated with EBRT, the recommended duration of androgen deprivation therapy (ADT) is two to three years.
        -- The patient is a 72-year-old male.
        -- The patient has locally advanced prostate cancer.
        -- The prostate cancer is classified as cT3/cT4.
        -- The patient was treated with EBRT (External Beam Radiation Therapy).
        -- The recommended duration of androgen deprivation therapy (ADT) for this patient is two to three years.
        
        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list):
        Androgen deprivation therapy (ADT) should be included in the treatment of a patient with clinically lymph node-positive prostate cancer (cN1) receiving external beam radiation therapy (EBRT). The recommended duration of ADT is 2 to 3 years. Additionally, in patients with good WHO performance status and without significant cardiovascular disease, the use of Abiraterone can be considered for a total of 2 years alongside ADT.
        -- Androgen deprivation therapy (ADT) should be included in the treatment of a patient with clinically lymph node-positive prostate cancer (cN1).
        -- The patient is receiving external beam radiation therapy (EBRT).
        -- The recommended duration of ADT is 2 to 3 years.
        -- In patients with good WHO performance status, the use of Abiraterone can be considered.
        -- The use of Abiraterone can be considered for a total of 2 years alongside ADT.
        -- Patients should not have significant cardiovascular disease for the use of Abiraterone to be considered.

        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list):
        For a patient with high-risk prostate cancer undergoing dose-escalated external beam radiation therapy (EBRT), it is recommended to administer androgen deprivation therapy (ADT) for 2 to 3 years concurrently.
        -- For a patient with high-risk prostate cancer, it is recommended to administer androgen deprivation therapy (ADT).
        -- Androgen deprivation therapy (ADT) is recommended for 2 to 3 years.
        -- Androgen deprivation therapy (ADT) is recommended to be administered concurrently with dose-escalated external beam radiation therapy (EBRT).
        -- Dose-escalated external beam radiation therapy (EBRT) is a treatment for high-risk prostate cancer.
        
        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list):
        Fertility and fertility preservation should be discussed with the premenopausal woman before the initiation of any systemic treatment. This is important to address the potential impact of cancer treatment on fertility and to explore options such as egg or embryo freezing, ovarian suppression, or other fertility preservation strategies.
        -- Fertility and fertility preservation should be discussed with the premenopausal woman before starting any systemic treatment.
        -- Cancer treatment has a potential impact on fertility.  
        -- Discussing fertility and fertility preservation addresses the potential impact of cancer treatment on fertility.
        -- Options for fertility preservation include egg or embryo freezing, ovarian suppression, or other strategies.
        -- The discussion about fertility and fertility preservation is important before initiating systemic treatment.

        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list):
        For a 60-year-old woman with HR-positive, HER2-negative early breast cancer with uncertainty about the need for adjuvant chemotherapy, gene expression assays and endocrine response assessment in the preoperative setting can be used to help guide the decision. These tests help assess the benefit of chemotherapy based on the biological characteristics of the tumor.
        -- The patient is a 60-year-old woman.
        -- The patient has HR-positive, HER2-negative early breast cancer.
        -- There is uncertainty about the need for adjuvant chemotherapy.
        -- Gene expression assays and endocrine response assessment can be used in the preoperative setting to help guide the decision regarding chemotherapy.
        -- Gene expression assays and endocrine response assessment help assess the benefit of chemotherapy.
        -- These tests evaluate the biological characteristics of the tumor.​
        
        Please breakdown the following text into independent facts (use -- as fact separator, do not use numbered list): \n''' + resp + '''
        -- 
        '''
        return prompt
    
    
    def get_chat_prompt(input_context: str, system_message: bool = True) -> list:
        message = []
    
        if system_message:
            message.append({"role": "system", "content": f"{get_system_prompt()}"})
    
        message.append({"role": "user", "content": f"""{input_context}
                """})
        return message
    
    
    def query_gpt(prompt):
        response = client.chat.completions.create(
            model="gpt-4o",
            max_tokens = 4096,
            messages=prompt,
            temperature=0) 
        return response
    
    #Use the generated response as the input
    prompt = get_chat_prompt("")
    
    response = query_gpt(prompt=prompt)
    result = response.choices[0].message.content.strip()
    #print("prediction:", result)

    atoms = result.split("--")
    atoms = [a.strip() for a in atoms]
    return result, atoms

all_atoms = list()
atom_results = list()
for response in all_responses:
    result, atoms = get_atomic_facts(response)
    atom_results.append(result)
    all_atoms.append(atoms)

print(all_atoms)

cleaned_atoms = [ats[1:] for ats in all_atoms]
cleaned_atoms

In [None]:
'''
Determine which atomic facts are correct and which are incorrect.
'''

client = OpenAI(api_key=apikey)
new_atoms = cleaned_atoms


all_contexts = list()
atom_context_results = list()

question_idx = 0
for atom_facts in new_atoms:
    instance_contexts = list()
    question = questions[question_idx]
    atom_results = list()
    
    for atom in atom_facts:
        ar = list()
        full_context = ""
        
        ## Find top 7 chunks (most similar to the atom fact) from the vector DB and concatenate them.
        results = prostate_db.search(question + " " + atom, "similarity", k=7)
        ar.extend(results)
        for chunk in results:
            full_context += chunk.page_content.replace("\n", " ")
            full_context += " | "

        atom_results.append(ar)
        instance_contexts.append(full_context)
        
    atom_context_results.append(atom_results)
    all_contexts.append(instance_contexts)
    question_idx += 1
    


def get_chat_prompt(question, context, atom, full_response) -> str:

    prompt = FEW_SHOT_ATOMIC_VERDICT + '''
    ########

    INPUT QUESTION
    ''' + question + '''

    INPUT CONTEXT 
    ''' + context + '''
    
    STATEMENT
    ''' + atom + '''

    Is the given statement TRUE or FALSE based on question and context? The statement is: '''

    message = []

    message.append({"role": "user", "content": prompt})
    return message


def query_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o", # set model version
        max_tokens = 4096,
        messages=prompt, # provide prompt in chat format
        temperature=0) # set model temperature = 0
    return response


all_verdicts = list()
question_idx = 0
for atom_facts in new_atoms:
    instance_results = list()
    
    instance_contexts = all_contexts[question_idx]
    question = questions[question_idx]
    full_response = all_responses[question_idx]

    atom_idx = 0
    for atom in atom_facts:
        atom_context = instance_contexts[atom_idx]
        prompt = get_chat_prompt(question, atom_context, atom, full_response)
        
        response = query_gpt(prompt=prompt)
        result = response.choices[0].message.content.strip()
        instance_results.append(result)  
        atom_idx += 1
    
    all_verdicts.append(instance_results)
    question_idx += 1
    

## get overview of final results

def flatten(xss):
    return [x for xs in xss for x in xs]

a = np.array(flatten(all_verdicts))
print(np.unique(a, return_counts=True))

for idx, av in enumerate(all_verdicts):
    print(idx, av)


In [None]:
'''
Rewrite those atoms determined to be incorrect in the previous step, based on the newly retrieved context from previous step.
'''

def get_chat_prompt(question, context, atom) -> str:

    prompt = FEW_SHOT_REWRITE + '''
    ########
    
    INPUT QUESTION:
    ''' + question + '''

    INPUT CONTEXT:
    ''' + context + '''
    
    STATEMENT:
    ''' + atom + '''

    The rewritten statement is: '''

    message = []

    message.append({"role": "user", "content": prompt})
    return message


def query_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o", # set model version
        max_tokens = 4096,
        messages=prompt, # provide prompt in chat format
        temperature=0) # set model temperature = 0
    return response


all_corrections = list()
question_idx = 0
for atom_facts in new_atoms:
    instance_results = list()
    
    instance_contexts = all_contexts[question_idx]
    question = questions[question_idx]

    atom_idx = 0
    for atom in atom_facts:
        verdict = all_verdicts[question_idx][atom_idx]
        if verdict == "TRUE":
            atom_idx += 1
            continue
        
        atom_context = instance_contexts[atom_idx]
        prompt = get_chat_prompt(question, atom_context, atom)
        
        response = query_gpt(prompt=prompt)
        result = response.choices[0].message.content.strip()
        instance_results.append(result)  
        print(prompt)
        print("RESULT: ", result)
        atom_idx += 1
    
    all_corrections.append(instance_results)
    question_idx += 1


In [None]:
'''
Rewrite the initial response based on the newly corrected atomic facts. 
'''


def get_chat_prompt(question, old_response, incorrect, corrected) -> str:

    prompt = '''
    You are a helpful AI assistant answering medical and clinical questions.

    You will be given an input question, old response to the question, and statements from the response found to be incorrect.
    You will also be given the corrected versions of the input statements. 
    
    Please rewrite the response to remove the incorrect claims and incorporate the corrected statements. You can rewrite it to make it more natural.
    
    INPUT QUESTION
    ''' + question + '''

    OLD RESPONSE
    ''' + old_response + '''

    INCORRECT STATEMENTS 
    ''' + incorrect +'''
    
    CORRECTED STATEMENTS
    ''' + corrected + '''

    The rewritten response is: '''

    message = []

    message.append({"role": "user", "content": prompt})
    return message


def query_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o", # set model version
        max_tokens = 4096,
        messages=prompt, # provide prompt in chat format
        temperature=0) # set model temperature = 0
    return response
    
client = OpenAI(api_key=apikey)


new_responses = list()
question_idx = 0
for atom_facts in new_atoms:
    if len(all_corrections[question_idx]) == 0:
        new_responses.append(all_responses[question_idx])
        question_idx += 1
        continue
    
    instance_results = list()
    
    corrected = all_corrections[question_idx]
    question = questions[question_idx]
    old_response = all_responses[question_idx]

    try:
        atom_idx = 0
        incorrect = list()
        for atom in atom_facts:
            if all_verdicts[question_idx][atom_idx] == "FALSE":
                incorrect.append(atom)
            atom_idx += 1
    except:
        print("EXCEPTION!!")
        print(question_idx, atom_idx)
        
    prompt = get_chat_prompt(question, old_response, str(incorrect), str(corrected))

    print("Question: ", question, "\n\n Initial response: ", old_response, "\n\n Incorrect atomic facts: ", str(incorrect), 
          "\n\n Corrected atomic facts: ", str(corrected), "\n ====\n\n" )
    
    response = query_gpt(prompt=prompt)
    result = response.choices[0].message.content.strip()
    new_responses.append(result)
    question_idx += 1
    