## Question Decomposition

In [1]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
import json
import os
import hashlib
import pandas as pd

from utils.PICO.prompt import get_zero_shot_pipeline_prompt,get_few_shot_pipeline_prompt
from utils.PICO.few_shot import match_few_shot, create_example_selector
from utils.PICO.pfe import generate_experience,generate_answer,combine_examples_with_experience
from utils.PICO.base import create_dataset



# Hyperparameters
YOUR_CONFIG_PATH = 'YOUR_CONFIG_PATH'  # e.g. 'config/config.json'
YOUR_QUESTION_DECOMPOSTITION_PATH =  'YOUR_QUESTION_DECOMPOSTITION_PATH' # e.g. 'data/2021ACR RA/Question_Decomposition'

dataset_name =  '2021ACR RA' # Choose from: 2021ACR RA, 2020EAN Dementia, 2024KDIGO CKD. If you want to use your own dataset, please modify the code accordingly.
clinical_question = "YOUR_CLINICAL_QUETSION" # Example: "In RA patients who have achieved sustained remission for over one year with DMARD monotherapy, is drug discontinuation advisable?"

In [None]:

config_path = os.path.join(YOUR_CONFIG_PATH)
with open(config_path, 'r', encoding="utf8") as file:
            config = json.load(file)
model_config = config['model']
provider = model_config[f'question_decomposition_model'].get('provider', 'OpenAI')
model_name = model_config[f'question_decomposition_model']['model_name']
api_key = model_config[f'question_decomposition_model']['API_KEY']
api_base_URL = model_config[f'question_decomposition_model']['BASE_URL']
qd_model = ChatOpenAI(openai_api_key=api_key, base_url=api_base_URL, model=model_name)


In [5]:
# Quicker contains three question decomposition methods: zero-shot, few-shot, and self-reelection. You can choose one of them based on your needs.
# Constructing the pipeline

# For zero-shot, you can use the following code:
pipeline_prompt = get_zero_shot_pipeline_prompt(dataset_name)
output_parser = StrOutputParser()
later_zero_shot_exp = qd_model | output_parser
local_zero_shot_chain = pipeline_prompt | RunnableParallel(
    generation_chain=later_zero_shot_exp, prompt_value=RunnablePassthrough()
    )

'''
# Additional hyperparameters needed for few-shot and self-reflection
shot_num = 5
model_embedding = OpenAIEmbeddings(
    api_key=config['model']['embeddings']['API_KEY'],
    base_url=config['model']['embeddings']['BASE_URL'],
    model=config['model']['embeddings']['model_name'],
)
# Following code is an example of creating a train dataset. Or you can specify your own train dataset (list) as examples. 
train_num = 20
train_dataset, train_dataset_df = create_dataset(
        dataset_name, 'train', train_num, return_df=True
    )

# For few-shot, you can use the following code:
few_shot_prompt = match_few_shot(
        shot_num, train_dataset, model_embedding, dataset_name
    )
pipeline_prompt = get_few_shot_pipeline_prompt(dataset_name, few_shot_prompt)
output_parser = StrOutputParser()
later_few_shot_exp = qd_model | output_parser
local_few_shot_chain = pipeline_prompt | RunnableParallel(
    generation_chain=later_few_shot_exp, prompt_value=RunnablePassthrough()
)


# Additional hyperparameters needed for self-reflection
pfe_save_path = "YOUR_PFE_SAVE_PATH" # The path to save the answer-experience pairs
regenerate = False # True if you want to regenerate the experience
# For self-reflection, you can use the following code:
data_path = os.path.join(
    pfe_save_path, f'train_data_with_exp_tn{shot_num}.csv'
)  
if os.path.exists(data_path):
    data_with_generation = pd.read_csv(data_path)
    if regenerate:
        # generate experience
        data_with_exp = data_with_generation.apply(
            lambda x: generate_experience(qd_model, x, dataset_name), axis=1
        )
        data_with_exp.to_csv(data_path, index=False)
    else:
        data_with_exp = data_with_generation

else:
    # generate answer
    example_selector = create_example_selector(
        shot_num + 1, train_dataset, model_embedding, dataset_name, 'pfe_fs'
    )
    data_with_generation = train_dataset_df.apply(
        lambda x: generate_answer(x, qd_model, example_selector, dataset_name), axis=1
    )
    data_with_generation.to_csv(data_path, index=False)
    # generate experience
    data_with_exp = data_with_generation.apply(
        lambda x: generate_experience(qd_model, x, dataset_name), axis=1
    )
    data_with_exp.to_csv(data_path, index=False)


# creater pfe_example_selector
train_dataset_with_exp = []
for i in range(len(data_with_exp)):
    train_dataset_with_exp.append(
        {
            "Question": data_with_exp['Question'].iloc[i],
            "Answer": data_with_exp['Answer'].iloc[i],
            "Experience": data_with_exp['Experience'].iloc[i],
        }
    )
pfe_example_selector = create_example_selector(
    shot_num, train_dataset_with_exp, model_embedding, dataset_name, 'pfe'
)


output_parser = StrOutputParser()
later_npe_exp = qd_model | output_parser
local_pfe_chain = RunnableLambda(
    lambda x: combine_examples_with_experience(
        pfe_example_selector=pfe_example_selector, dataset_name=dataset_name, **x
    )
) | RunnableParallel(
    generation_chain=later_npe_exp, prompt_value=RunnablePassthrough()
)
'''

  pipeline_prompt = PipelinePromptTemplate(


In [6]:
# Change the chain if you use the other two methods
answer_dict = local_zero_shot_chain.invoke({"Question": clinical_question})

In [7]:
print(answer_dict['generation_chain'])

```json
{
    "P": "RA patients in sustained remission for over one year with DMARD monotherapy"
}
```
```json
{
    "I": "Drug discontinuation"
}
```
```json
{
    "C": "Continued DMARD monotherapy"
}
```
```json
{
    "O": "Risk of disease flare or remission maintenance"
}
```


In [9]:
# Check if the output is satisfactory. You can modify the output parser to suit your needs using GUI. Following is a simple example.

population = "RA patients in sustained remission for over one year with DMARD monotherapy" # str
intervention = "Drug discontinuation" # str
comparison = ["Continued DMARD monotherapy"] # list

In [10]:
# Read pico_file
with open(os.path.join(YOUR_QUESTION_DECOMPOSTITION_PATH,'PICO_Information.json'), 'r', encoding="utf8") as file:
    pico_list = json.load(file)

# Add a new question to the pico_file
pico_dict = {}
pico_dict['Index'] = hashlib.sha256((clinical_question).encode('utf-8')).hexdigest()[:8]
pico_dict['Question'] = clinical_question
pico_dict['P'] = population
pico_dict['I'] = intervention
pico_dict['C'] = comparison
pico_list.append(pico_dict)

# Save to pico information file
with open(os.path.join(YOUR_QUESTION_DECOMPOSTITION_PATH,'PICO_Information.json'), 'w', encoding="utf8") as file:
    json.dump(pico_list, file, indent=4, ensure_ascii=False)