# Automated Question-Answer Generation from Pharmaceutical Data

This notebook systematically generates question-answer pairs from pharmaceutical data sources including:
1. PubMed target passages
2. DrugBank tables
3. Related pharmaceutical data

Each QA pair will include:
- Question
- Answer
- Source text/passage
- Related table

In [45]:
# Import required libraries
import os
import pandas as pd
import json
from pathlib import Path
from openai import OpenAI
import csv
from tqdm import tqdm
from collections import defaultdict
import time
import re


In [29]:
# Configuration and paths
PUBMED_TARGETS_DIR = '../data/Pharma/pubmed-targets'
DRUGBANK_TABLES_DIR = '../data/Pharma/drugbank-tables'
MAPPING_FILE = '../data/Pharma/pubmed-drugbank-tables.gt'
OUTPUT_FILE = 'passage_output.gt'

# Initialize your LLM API key if needed
# os.environ["OPENAI_API_KEY"]="<KEY>"
client = OpenAI()


In [11]:
def load_passage_table_mapping():
    """Load the mapping between passages and their relevant tables"""
    mapping = defaultdict(list)
    with open(MAPPING_FILE, 'r') as f:
        for line in f:
            passage_id, table_name = line.strip().split(',')
            mapping[passage_id].append(table_name)
    # print("mapping", mapping)
    return mapping

def load_target_passages():
    """Load all target passages from the pubmed-targets directory"""
    passages = {}
    target_files = Path(PUBMED_TARGETS_DIR).glob('Target-*')
    
    for file_path in target_files:
        target_id = file_path.name
        with open(file_path, 'r') as f:
            passages[target_id] = f.read()
    # print("passages", passages)
    return passages

def load_drugbank_tables():
    """Load all relevant DrugBank tables"""
    tables = {}
    csv_files = Path(DRUGBANK_TABLES_DIR).glob('*.csv')
    
    for file_path in csv_files:
        table_name = file_path.stem
        tables[table_name] = pd.read_csv(file_path)
    
    # print("tables", tables)
    return tables

In [19]:
def get_relevant_table_content(tables, table_names, max_rows=5):
    """Extract relevant content from tables for context"""
    print("Debug - Available tables:", tables.keys())
    print("Debug - Looking for table_names:", table_names)

    table_content = {}
    for table_name in table_names:
        # Remove .csv extension if present
        base_table_name = table_name.replace('.csv', '')
        
        if base_table_name in tables:
            df = tables[base_table_name]
            table_content[base_table_name] = {
                'columns': list(df.columns),
                'sample': df.head(max_rows).to_dict('records')
            }
        else:
            print(f"Debug - Table '{base_table_name}' not found in available tables")
    return table_content

def generate_questions_for_passage(passage_id, passage_text, model="gpt-4o"):
    """Generate questions for a given passage and its relevant tables using LLM"""

    # print("tables", tables)
    # print("relevant_table_names", relevant_table_names)
    
    # Limit passage length if too long (e.g., first 1000 characters)
    passage_text = passage_text[:1000] + "..." if len(passage_text) > 1000 else passage_text
    
    # # Limit to maximum 3 relevant tables
    # relevant_table_names = relevant_table_names[:3]
    
    # Get relevant table content
    # table_content = get_relevant_table_content(tables, relevant_table_names)


    # have the system generate one question answer pair that is very difficult/numerical and then taken another passage and aask the LLM to generate another question answer pair, thne try and combine the two questions and answers
    prompt = f"""
    Given the following passage(s), generate 1 meaningful question-answer pair.
    IMPORTANT: Each question MUST be answerable using information from ONLY the passage.
    Only generate questions that require information from the passage.
    Focus on pharmaceutical and medical aspects. Try and make the question as difficult and technical as possible.
    
    
    Passage (ID: {passage_id}):
    {passage_text}
    
    Generate questions in the following format:
    1. question: [specific question about drug/treatment]
       answer: [detailed answer combining information from passage and tables]
       text: [passage ID if information from passage was used (e.g. Target-123456789)]
       table: [None]
    
    Ensure every question uses information from the passage.
    """
    
    # Call your LLM here with the prompt
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a medical and pharmaceutical expert tasked with generating detailed question-answer pairs about drugs and treatments."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    print(response.choices[0].message.content)
    
    # Parse the response into structured QA pairs
    qa_pairs = parse_llm_response(response.choices[0].message.content, passage_id)
    return qa_pairs

def parse_llm_response(response_text, passage_id):
    """Parse the LLM response into structured QA pairs"""
    qa_pairs = []
    
    # Split the response into individual QA entries
    entries = response_text.strip().split('\n\n')
    
    for entry in entries:
        if not entry.strip():
            continue
            
        lines = entry.strip().split('\n')
        current_qa = {
            'question': '',
            'answer': '',
            'text': passage_id,
            'table': 'None'  # Default value
        }
        
        for line in lines:
            line = line.strip()
            # Skip empty lines and numbering
            if not line or line.replace('.', '').strip().isdigit():
                continue
                
            # Parse each field using more robust splitting
            if 'question:' in line:
                current_qa['question'] = line.split('question:', 1)[1].strip()
            elif 'answer:' in line:
                current_qa['answer'] = line.split('answer:', 1)[1].strip()
            elif 'text:' in line:
                current_qa['text'] = line.split('text:', 1)[1].strip()
            elif 'table:' in line:
                table_value = line.split('table:', 1)[1].strip()
                # Handle NA, N/A, None cases
                current_qa['table'] = 'None' if table_value.upper() in ['NA', 'N/A', 'NONE'] else table_value
        
        # Only add complete QA pairs that have both question and answer
        if current_qa['question'] and current_qa['answer']:
            qa_pairs.append(current_qa.copy())  # Use copy to avoid reference issues
    
    return qa_pairs

In [20]:
def main():
    # Load mappings and data
    # print("Loading passage-table mappings...")
    # passage_table_mapping = load_passage_table_mapping()
    
    print("Loading target passages...")
    passages = load_target_passages()
    
    print("Loading DrugBank tables...")
    tables = load_drugbank_tables()
    
    # Initialize output list
    qa_pairs = []
    
    # Process each passage with its relevant tables
    for passage_id, passage_text in tqdm(list(passages.items())[:20]):
        # if passage_id in passage_table_mapping:
        #     relevant_tables = passage_table_mapping[passage_id]
        #     # print(passage_id, passage_text, tables, relevant_tables)
            
        # Generate QA pairs using the passage and its relevant tables
        new_qa_pairs = generate_questions_for_passage(
            passage_id,
            passage_text
        )
        # new_qa_pairs = []
        print(new_qa_pairs)
        qa_pairs.extend(new_qa_pairs)
    
    # Save results
    with open(OUTPUT_FILE, 'w', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow(['question', 'answer', 'text', 'table'])  # header
        for qa_pair in qa_pairs:
            writer.writerow([
                qa_pair['question'],
                qa_pair['answer'],
                qa_pair['text'],
                qa_pair['table']
            ])
        
    print(f"Generated {len(qa_pairs)} question-answer pairs")

In [21]:
if __name__ == "__main__":
    main()

Loading target passages...
Loading DrugBank tables...


  5%|▌         | 1/20 [00:05<01:36,  5.08s/it]

1. question: What potential mechanism might explain the liver injury caused by bosentan during clinical trials, and how does the co-administration of glyburide affect this outcome?
   answer: The liver injury caused by bosentan during clinical trials might be explained by the inhibition of the hepatocanalicular bile salt export pump (BSEP/ABCB11), leading to increased serum bile salt levels. The co-administration of glyburide enhances the cholestatic potency of bosentan, suggesting that glyburide may exacerbate the inhibition of BSEP, thereby increasing the risk of liver injury.
   text: Target-11309550
   table: None
[{'question': 'What potential mechanism might explain the liver injury caused by bosentan during clinical trials, and how does the co-administration of glyburide affect this outcome?', 'answer': 'The liver injury caused by bosentan during clinical trials might be explained by the inhibition of the hepatocanalicular bile salt export pump (BSEP/ABCB11), leading to increased

 10%|█         | 2/20 [00:10<01:34,  5.23s/it]

1. question: How does the metabolism of treprostinil diethanolamine influence its co-administration with bosentan in the treatment of pulmonary arterial hypertension?
   answer: Treprostinil diethanolamine is primarily metabolized by cytochrome P450 (CYP) 2C8, with a minor contribution from CYP2C9. When co-administered with bosentan, which is known to induce CYP2C9 and 3A4, there is a potential for drug interactions. However, the study mentioned in the passage evaluated the interaction and found that the geometric mean ratios for steady-state AUC(0-12) and C(max) of treprostinil when combined with bosentan were 0.92 and 0.96, respectively, indicating that the co-administration does not significantly alter the metabolism of treprostinil.
   text: Target-20133511
   table: None
[{'question': 'How does the metabolism of treprostinil diethanolamine influence its co-administration with bosentan in the treatment of pulmonary arterial hypertension?', 'answer': 'Treprostinil diethanolamine is 

 15%|█▌        | 3/20 [00:16<01:33,  5.51s/it]

1. question: How does the CYP 2D6 genotype influence the effectiveness of tamoxifen therapy in breast cancer patients, and what factors are considered in predicting endoxifen concentrations?
   answer: The CYP 2D6 genotype is crucial for the effectiveness of tamoxifen therapy because tamoxifen is a prodrug that needs to be converted into its active metabolite, endoxifen, through the action of the CYP 2D6 enzyme. Predicting endoxifen concentrations involves considering a patient's CYP 2D6 genotype to calculate an activity score, which is adjusted for co-medication use that can inhibit 2D6 activity. Additionally, demographic factors such as age, race/ethnicity, body mass index, and the use of other medications and herbals are also taken into account to accurately predict endoxifen concentrations.
   text: Target-22294487
   table: None
[{'question': 'How does the CYP 2D6 genotype influence the effectiveness of tamoxifen therapy in breast cancer patients, and what factors are considered i

 20%|██        | 4/20 [00:19<01:12,  4.52s/it]

1. question: How does KN-62 affect interleukin-18 release in the context of sepsis, and what is its mechanism of action?
   answer: KN-62 suppresses the release of interleukin-18 from peripheral blood mononuclear cells by inhibiting ATP-mediated cellular activation through the purinoreceptor subtype P(2x7). This pharmacological intervention is significant as it can potentially curb the overproduction of interleukin-18, which is a characteristic of inflammatory conditions like sepsis.
   text: Target-14660039
   table: None
[{'question': 'How does KN-62 affect interleukin-18 release in the context of sepsis, and what is its mechanism of action?', 'answer': 'KN-62 suppresses the release of interleukin-18 from peripheral blood mononuclear cells by inhibiting ATP-mediated cellular activation through the purinoreceptor subtype P(2x7). This pharmacological intervention is significant as it can potentially curb the overproduction of interleukin-18, which is a characteristic of inflammatory co

 25%|██▌       | 5/20 [00:21<00:55,  3.73s/it]

1. question: How does omeprazole induce CYP1A1 in HepG2 cells, and how does this mechanism differ from the induction by TCDD?
   answer: Omeprazole induces CYP1A1 in HepG2 cells by initiating a protein tyrosine kinase-mediated signal transduction pathway. This mechanism is different from the induction by TCDD, which is mediated through the Ah receptor. The passage indicates that while TCDD-induced CYP1A1 expression can be inhibited by an Ah receptor antagonist, the induction by omeprazole is not affected by this antagonist, highlighting the distinct pathways utilized by these compounds.
   text: Target-10445394
   table: None
[{'question': 'How does omeprazole induce CYP1A1 in HepG2 cells, and how does this mechanism differ from the induction by TCDD?', 'answer': 'Omeprazole induces CYP1A1 in HepG2 cells by initiating a protein tyrosine kinase-mediated signal transduction pathway. This mechanism is different from the induction by TCDD, which is mediated through the Ah receptor. The pas

 30%|███       | 6/20 [00:24<00:49,  3.51s/it]

1. question: How does sex influence the pharmacokinetics and pharmacodynamics of Zolpidem in the context of metabolic interactions involving CYP3A?  
   answer: Sex acts as a differentiating factor in the pharmacokinetics and pharmacodynamics of Zolpidem when metabolic interactions involve the enzyme CYP3A. This suggests that males and females may process Zolpidem differently due to variations in CYP3A activity, which could affect the drug's efficacy and safety profile as a hypnotic and sedative.  
   text: Target-20552178  
   table: None
[{'question': 'How does sex influence the pharmacokinetics and pharmacodynamics of Zolpidem in the context of metabolic interactions involving CYP3A?', 'answer': "Sex acts as a differentiating factor in the pharmacokinetics and pharmacodynamics of Zolpidem when metabolic interactions involve the enzyme CYP3A. This suggests that males and females may process Zolpidem differently due to variations in CYP3A activity, which could affect the drug's effica

 35%|███▌      | 7/20 [00:29<00:49,  3.80s/it]

1. question: What was the effect of dopamine compared to dobutamine on alveolar fluid clearance in the study involving anesthetized, ventilated rats?
   answer: In the study, dopamine, whether administered intra-alveolarly at a concentration of 10(-4) M or intravenously at a dose of 5-10 micrograms/kg/min, had no effect on alveolar fluid clearance. In contrast, dobutamine, both when given intra-alveolarly at 10(-4) M and intravenously at 5 micrograms/kg/min, increased alveolar liquid clearance. This suggests that dobutamine, which acts on both beta-1 and beta-2 receptors, enhances alveolar fluid clearance, whereas dopamine, a beta-1 agonist, does not.
   text: Target-9279221
   table: None
[{'question': 'What was the effect of dopamine compared to dobutamine on alveolar fluid clearance in the study involving anesthetized, ventilated rats?', 'answer': 'In the study, dopamine, whether administered intra-alveolarly at a concentration of 10(-4) M or intravenously at a dose of 5-10 microgra

 40%|████      | 8/20 [00:36<01:00,  5.07s/it]

1. question: How does the concomitant administration of erythromycin affect the pharmacokinetics and pharmacodynamics of argatroban in terms of AUC, Cmax, and aPTT values in healthy subjects?
   answer: The concomitant administration of erythromycin, a potent CYP3A4/5 inhibitor, does not significantly affect the pharmacokinetics of argatroban, as the mean values for argatroban area under the concentration-time curves (AUC0-inf), maximum concentration (Cmax), and half-life (t1/2) were similar whether argatroban was administered alone or with erythromycin. Additionally, the pharmacodynamics, as measured by mean activated partial thromboplastin time (aPTT) values, were also not significantly affected by the combination of argatroban and erythromycin.
   text: Target-10234600
   table: None
[{'question': 'How does the concomitant administration of erythromycin affect the pharmacokinetics and pharmacodynamics of argatroban in terms of AUC, Cmax, and aPTT values in healthy subjects?', 'answe

 45%|████▌     | 9/20 [00:39<00:45,  4.18s/it]

1. question: What are the FDA-approved indications for Metoprolol, and how does its role in hypertension management reflect current controversies in beta-blocker selection?
   answer: Metoprolol is FDA-approved for the treatment of angina, heart failure, myocardial infarction, atrial fibrillation/flutter, and hypertension. There is ongoing controversy regarding the selection of beta-blockers, including metoprolol, for the management of these conditions. Specifically, its role as initial therapy for hypertension, especially without compelling indications, has been questioned. This reflects a broader debate about the optimal selection of beta-blockers for specific diseases.
   text: Target-30422518
   table: None
[{'question': 'What are the FDA-approved indications for Metoprolol, and how does its role in hypertension management reflect current controversies in beta-blocker selection?', 'answer': 'Metoprolol is FDA-approved for the treatment of angina, heart failure, myocardial infarctio

 50%|█████     | 10/20 [00:41<00:35,  3.57s/it]

1. question: How does the addition of pentoxifylline to cardioplegic solutions potentially mitigate myocardial inflammatory reactions and ischemia/reperfusion injury during extracorporeal circulation?
   answer: The addition of pentoxifylline (Ptx) to cardioplegic solutions may mitigate myocardial inflammatory reactions and ischemia/reperfusion injury during extracorporeal circulation through the inhibition of 5'-nucleotidase (5'-NT), which decreases the necessity of cell energy and inflammatory reactions. This mechanism was investigated in a study involving 75 patients with various cardiac conditions, where Ptx was added to the cardioplegic solution at a concentration of 500 mg/L in the study group to assess its effects.
   text: Target-16426349
   table: None
[{'question': 'How does the addition of pentoxifylline to cardioplegic solutions potentially mitigate myocardial inflammatory reactions and ischemia/reperfusion injury during extracorporeal circulation?', 'answer': "The addition

 55%|█████▌    | 11/20 [00:43<00:29,  3.23s/it]

1. question: Which cytochrome P-450 isoforms have been identified as significantly involved in the metabolism of disopyramide enantiomers, and what evidence supports this involvement?
   answer: The study identified cytochrome P-450 isoforms, particularly those inhibited by SKF 525A and troleandomycin, as significantly involved in the metabolism of disopyramide enantiomers. These inhibitors potently suppressed the metabolism of both R(-)- and S(+)-disopyramide enantiomers with IC50 values of less than 7.3 and 18.9 microM, respectively. This potent inhibition contrasts with the weak inhibitory effects observed for other CYP isoform substrates, indicating a specific involvement of the isoforms targeted by SKF 525A and troleandomycin.
   text: Target-10901704
   table: None
[{'question': 'Which cytochrome P-450 isoforms have been identified as significantly involved in the metabolism of disopyramide enantiomers, and what evidence supports this involvement?', 'answer': 'The study identifie

 60%|██████    | 12/20 [00:46<00:23,  2.96s/it]

1. question: What were the findings of the study regarding the antinociceptive effects of droperidol and morphine when administered intrathecally in a rat model?
   answer: The study found that droperidol, when administered intrathecally, had no antinociceptive effect by itself or in combination with morphine in a rat model. Additionally, the combination of droperidol and morphine did not produce any histopathological effects on the rat spinal cord. This suggests a discrepancy between clinical findings and experimental pain studies, indicating different modes of action for droperidol under these conditions.
   text: Target-1549935
   table: None
[{'question': 'What were the findings of the study regarding the antinociceptive effects of droperidol and morphine when administered intrathecally in a rat model?', 'answer': 'The study found that droperidol, when administered intrathecally, had no antinociceptive effect by itself or in combination with morphine in a rat model. Additionally, t

 65%|██████▌   | 13/20 [00:48<00:20,  2.93s/it]

1. question: What role do the HM74 (GPR109B) and HM74A (GPR109A) genes play in the treatment of dyslipidemias and what challenges are associated with them?
   answer: The HM74 (GPR109B) and HM74A (GPR109A) genes code for Gi-G protein-coupled orphan receptors that are involved in the metabolic effects of niacin, a B vitamin used as an important agent in the treatment of dyslipidemias. However, the use of niacin is limited by side effects. The novel role of these genes in niacin metabolism suggests they could be new targets for drug development. Challenges associated with these genes include human genetic variations that have not been extensively studied, which may affect the response to agents targeting these receptors. Additionally, many nonsynonymous SNPs reported in public databases for HM74 and HM74A are artifacts due to the extensive homology between these genes, highlighting a neglected phenomenon in sequence reporting.
   text: Target-15580557
   table: None
[{'question': 'What r

 70%|███████   | 14/20 [00:52<00:18,  3.12s/it]

1. question: How does the cytochrome P450 2D6 (CYP2D6) enzyme affect the pharmacokinetics of tolterodine in poor metabolizers compared to extensive metabolizers, and what are the implications for systemic clearance?
   answer: The cytochrome P450 2D6 (CYP2D6) enzyme significantly affects the pharmacokinetics of tolterodine, as evidenced by the systemic clearance differences between poor and extensive metabolizers. Poor metabolizers of debrisoquin (a marker for CYP2D6 activity) exhibited a mean systemic clearance of tolterodine of 9.0 +/- 2.1 L/hr, which is significantly lower (p < 0.001) than the 44 +/- 13 L/hr observed in extensive metabolizers. This indicates that poor metabolizers have a reduced ability to clear tolterodine from the system, resulting in a longer duration of the drug's presence in the body.
   text: Target-9630826
   table: None
[{'question': 'How does the cytochrome P450 2D6 (CYP2D6) enzyme affect the pharmacokinetics of tolterodine in poor metabolizers compared to 

 75%|███████▌  | 15/20 [00:54<00:14,  2.82s/it]

1. question: How does the CYP2D6 phenotype affect the plasma concentration and H1-receptor occupancy of chlorpheniramine in individuals?
   answer: The CYP2D6 phenotype influences both the plasma concentration and H1-receptor occupancy of chlorpheniramine. Extensive metabolizers show more than 80% occupancy of H1-receptors for 12 hours after an 8 mg dose, whereas poor metabolizers maintain greater than 60% occupancy from 12 to 30 hours, even when plasma concentrations fall below levels expected to result in 50% receptor occupancy. This indicates that CYP2D6 may play a role in forming a potent active metabolite of chlorpheniramine, affecting its efficacy.
   text: Target-7648771
   table: None
[{'question': 'How does the CYP2D6 phenotype affect the plasma concentration and H1-receptor occupancy of chlorpheniramine in individuals?', 'answer': 'The CYP2D6 phenotype influences both the plasma concentration and H1-receptor occupancy of chlorpheniramine. Extensive metabolizers show more than

 80%|████████  | 16/20 [00:56<00:10,  2.64s/it]

1. question: What are the specific considerations for using atazanavir in treatment-experienced patients, and how does it differ from its use in treatment-naive patients?
   answer: In treatment-experienced patients, atazanavir should be pharmacologically boosted with ritonavir to enhance its efficacy. This is in contrast to treatment-naive patients, where atazanavir can be administered as 400 mg/day without the need for ritonavir boosting. The need for boosting in treatment-experienced patients arises due to potential resistance issues and interactions with other antiretrovirals such as tenofovir or efavirenz.
   text: Target-15585441
   table: None
[{'question': 'What are the specific considerations for using atazanavir in treatment-experienced patients, and how does it differ from its use in treatment-naive patients?', 'answer': 'In treatment-experienced patients, atazanavir should be pharmacologically boosted with ritonavir to enhance its efficacy. This is in contrast to treatment-

 85%|████████▌ | 17/20 [01:00<00:08,  2.90s/it]

1. question: How does warfarin administration affect the utilization of phylloquinone and menaquinone-9 in the liver, and what does this indicate about their relative efficiency as substrates for vitamin K-dependent gamma-glutamyl carboxylase?
   answer: Warfarin administration blocks the enzyme activity of microsomal vitamin K epoxide reductase, leading to the accumulation of vitamin K 2,3-epoxide, a co-product of the carboxylation reaction. In a rat model with equimolar amounts of phylloquinone and menaquinone-9 (MK-9) in the liver, four times as much phylloquinone epoxide as MK-9 epoxide was detected in the liver one hour after warfarin administration. This suggests that hepatic phylloquinone is utilized more efficiently than MK-9 as a substrate for the vitamin K-dependent gamma-glutamyl carboxylase.
   text: Target-7638250
   table: None
[{'question': 'How does warfarin administration affect the utilization of phylloquinone and menaquinone-9 in the liver, and what does this indicat

 90%|█████████ | 18/20 [01:04<00:06,  3.27s/it]

1. question: What are the differences in receptor binding affinities between amoxapine and loxapine, and how might these differences contribute to their psychopharmacological effects?
   answer: Amoxapine and loxapine both show high affinities for 5-HT2, D2, and alpha 1 receptors with Ki values less than 10(-7) mol/L. They have moderate affinity for the alpha 2 receptor (Ki less than 10(-6) mol/L) and low affinities for M and 5-HT1 receptors (Ki less than 10(-5) mol/L). Notably, amoxapine has low affinities for D1 and GABA receptors, while loxapine has a moderate affinity for the D1 receptor, nearly 20 times greater than amoxapine. Amoxapine exhibits more potent inhibitory effects on serotonin receptors and weaker inhibitory effects on dopamine receptors. Neither drug shows significant affinity for BZ and beta-adrenergic receptors. These differences in receptor affinities may contribute to their distinct psychopharmacological effects, potentially influencing their therapeutic and side 

 95%|█████████▌| 19/20 [01:06<00:02,  2.95s/it]

1. question: What are the challenges in predicting the response to Methotrexate (MTX) therapy in rheumatoid arthritis patients, and how does pharmacogenetics aim to address these challenges?
   answer: Predicting the response to Methotrexate (MTX) therapy in rheumatoid arthritis patients is challenging due to the variability in individual responses, with up to one-third of patients experiencing inefficacy or adverse events. Currently, it is not possible to accurately predict which patients will respond to MTX therapy. Pharmacogenetics, which studies the variability in drug response due to genetic differences, aims to address these challenges by understanding the complex intracellular metabolism of MTX and its interaction with key enzymes. Further work in MTX pharmacogenetics is needed to improve the accuracy of therapy response predictions.
   text: Target-17586865
   table: None
[{'question': 'What are the challenges in predicting the response to Methotrexate (MTX) therapy in rheumato

100%|██████████| 20/20 [01:09<00:00,  3.47s/it]

1. question: What are the mechanisms by which oxaprozin induces apoptosis in immune complex-activated monocytes, and how do these mechanisms compare to other NSAIDs?
   answer: Oxaprozin induces apoptosis in immune complex-activated monocytes by affecting specific signaling pathways. The study particularly examined the activity of caspase-3, the involvement of the IkappaB kinase (IKK)-nuclear factor kappaB (NF-kappaB) system, and the activity of X-linked mammalian inhibitor of apoptosis protein (XIAP). These pathways are distinct from the traditional cyclooxygenase (COX) inhibition typically associated with NSAIDs, suggesting that oxaprozin may have unique effects compared to other drugs in the same class by promoting apoptosis through mechanisms not solely reliant on COX inhibition or prostaglandin synthesis inhibition.
   text: Target-19338579
   table: None
[{'question': 'What are the mechanisms by which oxaprozin induces apoptosis in immune complex-activated monocytes, and how do t




In [46]:

def load_ground_truth(gt_file: str):
    """Load ground truth data from CSV file."""
    gt_data = []
    with open(gt_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            gt_data.append(row)
    return gt_data

def generate_llm_answer(question: str, model: str = "gpt-4o"):
    """
    Generate an answer to a question using an LLM.
    
    Args:
        question: The question to answer
        model: Model to use for generation
        
    Returns:
        The generated answer
    """
    try:
        # Create a simple prompt with just the question
        prompt = f"Answer this question with detailed information: {question}"
        print(f"Prompt: {prompt}")
        
        # Call OpenAI API
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0,  # Use low temperature for more consistent answers
            max_tokens=500
        )
        
        # Extract the response text
        return response.choices[0].message.content.strip()
            
    except Exception as e:
        print(f"Error generating answer: {e}")
        time.sleep(2)  # Sleep to handle rate limits
        return ""

def calculate_llm_correctness(hypothesis: str, reference: str, question: str, model: str = "gpt-4o"):
    """
    Use an LLM to evaluate the correctness of the hypothesis compared to the reference.
    
    Args:
        hypothesis: The system-generated answer
        reference: The ground truth answer
        question: The original question
        model: Model to use for evaluation
        
    Returns:
        A score between 0 and 1 representing correctness (1 = fully correct, 0 = incorrect)
    """
    try:
        # Create prompt for the LLM
        prompt = f"""
You are an expert evaluator assessing the correctness of an answer to a question.

Question: {question}

Ground Truth Answer: {reference}

System Answer: {hypothesis}

Evaluate how correct the System Answer is compared to the Ground Truth Answer. Be very critical in your evaluation/analysis.
Give a score from 0 to 1 where:
- 1.0 means the System Answer is fully correct and contains all the information from the Ground Truth
- 0.0 means the System Answer is completely incorrect
- Values between 0 and 1 indicate partial correctness

Output a single line with just the score as a decimal between 0 and 1.
"""

        # Call OpenAI API
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0,  # Use low temperature for more consistent evaluations
            max_tokens=300
        )
        
        # Extract the response text
        response_text = response.choices[0].message.content.strip()
        
        # Extract the score from the response - find the last number between 0 and 1
        score_matches = re.findall(r'(?:^|\s)(0(?:\.\d+)?|1(?:\.0+)?)(?:$|\s)', response_text)
        if score_matches:
            score = float(score_matches[-1])  # Take the last match as the final score
            return min(max(score, 0.0), 1.0)  # Ensure score is between 0 and 1
        else:
            print(f"Could not extract a score from LLM response: {response_text}")
            return 0.0
            
    except Exception as e:
        print(f"Error in LLM evaluation: {e}")
        time.sleep(2)  # Sleep to handle rate limits
        return 0.0

def filter_qa_pairs(gt_data,  llm_model: str = "gpt-4o", threshold: float = 0.5):
    """
    Filter question-answer pairs based on LLM correctness score.
    
    Args:
        gt_data: List of dictionaries containing ground truth data
        llm_model: Model to use for generation and evaluation
        threshold: Correctness threshold above which pairs are removed
        
    Returns:
        Filtered list of dictionaries
    """
    filtered_data = []
    
    for qa_pair in tqdm(gt_data, desc="Filtering QA pairs"):
        question = qa_pair['question']
        reference_answer = qa_pair['answer']
        
        # Generate an answer using just the question
        llm_answer = generate_llm_answer(question)
        
        # If we couldn't get an answer, keep the pair
        if not llm_answer:
            filtered_data.append(qa_pair)
            continue
        
        # Evaluate the correctness
        correctness_score = calculate_llm_correctness(
            llm_answer, reference_answer, question
        )
        
        print(f"Question: {question}")
        print(f"LLM Answer: {llm_answer[:100]}...")
        print(f"Correctness Score: {correctness_score}")
        
        # Keep pairs that the LLM couldn't answer correctly
        if correctness_score <= threshold:
            filtered_data.append(qa_pair)
    
    return filtered_data

def save_filtered_data(filtered_data, output_file):
    """Save filtered data to a CSV file."""
    if not filtered_data:
        print("No data to save!")
        return
    
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=filtered_data[0].keys(), quoting=csv.QUOTE_ALL)
        writer.writeheader()
        for row in filtered_data:
            writer.writerow(row)
    


In [47]:
# Load ground truth data
print(f"Loading ground truth data")
gt_data = load_ground_truth(OUTPUT_FILE)
print(f"Loaded {len(gt_data)} QA pairs")

# Filter pairs
filtered_data = filter_qa_pairs(gt_data, 0.5)
print(f"Filtered to {len(filtered_data)} QA pairs")

filtered_output_file = OUTPUT_FILE.replace(".gt", "_filtered.gt")
# Save filtered data
save_filtered_data(filtered_data, filtered_output_file)
print(f"Saved filtered data to {filtered_output_file}")

# generate_llm_answer("What is the main target of the drug?")

Loading ground truth data
Loaded 20 QA pairs


Filtering QA pairs:   0%|          | 0/20 [00:00<?, ?it/s]

Prompt: Answer this question with detailed information: What potential mechanism might explain the liver injury caused by bosentan during clinical trials, and how does the co-administration of glyburide affect this outcome?


Filtering QA pairs:   5%|▌         | 1/20 [00:08<02:37,  8.31s/it]

Question: What potential mechanism might explain the liver injury caused by bosentan during clinical trials, and how does the co-administration of glyburide affect this outcome?
LLM Answer: Bosentan is an endothelin receptor antagonist used primarily in the treatment of pulmonary arterial ...
Correctness Score: 0.8
Prompt: Answer this question with detailed information: How does the metabolism of treprostinil diethanolamine influence its co-administration with bosentan in the treatment of pulmonary arterial hypertension?


Filtering QA pairs:  10%|█         | 2/20 [00:17<02:36,  8.71s/it]

Question: How does the metabolism of treprostinil diethanolamine influence its co-administration with bosentan in the treatment of pulmonary arterial hypertension?
LLM Answer: Treprostinil diethanolamine and bosentan are both used in the treatment of pulmonary arterial hypert...
Correctness Score: 0.6
Prompt: Answer this question with detailed information: How does the CYP 2D6 genotype influence the effectiveness of tamoxifen therapy in breast cancer patients, and what factors are considered in predicting endoxifen concentrations?


Filtering QA pairs:  15%|█▌        | 3/20 [00:23<02:10,  7.67s/it]

Question: How does the CYP 2D6 genotype influence the effectiveness of tamoxifen therapy in breast cancer patients, and what factors are considered in predicting endoxifen concentrations?
LLM Answer: The CYP2D6 genotype plays a significant role in influencing the effectiveness of tamoxifen therapy i...
Correctness Score: 0.8
Prompt: Answer this question with detailed information: How does KN-62 affect interleukin-18 release in the context of sepsis, and what is its mechanism of action?


Filtering QA pairs:  20%|██        | 4/20 [00:30<02:00,  7.50s/it]

Question: How does KN-62 affect interleukin-18 release in the context of sepsis, and what is its mechanism of action?
LLM Answer: KN-62 is a well-known inhibitor of the P2X7 receptor, which is a type of purinergic receptor that pl...
Correctness Score: 0.9
Prompt: Answer this question with detailed information: How does omeprazole induce CYP1A1 in HepG2 cells, and how does this mechanism differ from the induction by TCDD?


Filtering QA pairs:  25%|██▌       | 5/20 [00:44<02:22,  9.50s/it]

Question: How does omeprazole induce CYP1A1 in HepG2 cells, and how does this mechanism differ from the induction by TCDD?
LLM Answer: Omeprazole is a proton pump inhibitor commonly used to treat conditions like gastroesophageal reflux...
Correctness Score: 0.5
Prompt: Answer this question with detailed information: How does sex influence the pharmacokinetics and pharmacodynamics of Zolpidem in the context of metabolic interactions involving CYP3A?


Filtering QA pairs:  30%|███       | 6/20 [00:51<02:01,  8.66s/it]

Question: How does sex influence the pharmacokinetics and pharmacodynamics of Zolpidem in the context of metabolic interactions involving CYP3A?
LLM Answer: Zolpidem is a non-benzodiazepine hypnotic agent commonly used for the short-term treatment of insomn...
Correctness Score: 0.9
Prompt: Answer this question with detailed information: What was the effect of dopamine compared to dobutamine on alveolar fluid clearance in the study involving anesthetized, ventilated rats?


Filtering QA pairs:  35%|███▌      | 7/20 [00:57<01:42,  7.85s/it]

Question: What was the effect of dopamine compared to dobutamine on alveolar fluid clearance in the study involving anesthetized, ventilated rats?
LLM Answer: In studies investigating the effects of dopamine and dobutamine on alveolar fluid clearance, particu...
Correctness Score: 0.2
Prompt: Answer this question with detailed information: How does the concomitant administration of erythromycin affect the pharmacokinetics and pharmacodynamics of argatroban in terms of AUC, Cmax, and aPTT values in healthy subjects?


Filtering QA pairs:  40%|████      | 8/20 [01:09<01:51,  9.29s/it]

Question: How does the concomitant administration of erythromycin affect the pharmacokinetics and pharmacodynamics of argatroban in terms of AUC, Cmax, and aPTT values in healthy subjects?
LLM Answer: The concomitant administration of erythromycin, a known inhibitor of the cytochrome P450 3A4 (CYP3A4...
Correctness Score: 0.3
Prompt: Answer this question with detailed information: What are the FDA-approved indications for Metoprolol, and how does its role in hypertension management reflect current controversies in beta-blocker selection?


Filtering QA pairs:  45%|████▌     | 9/20 [01:16<01:33,  8.47s/it]

Question: What are the FDA-approved indications for Metoprolol, and how does its role in hypertension management reflect current controversies in beta-blocker selection?
LLM Answer: Metoprolol is a beta-blocker that is commonly used in the management of various cardiovascular condi...
Correctness Score: 0.9
Prompt: Answer this question with detailed information: How does the addition of pentoxifylline to cardioplegic solutions potentially mitigate myocardial inflammatory reactions and ischemia/reperfusion injury during extracorporeal circulation?


Filtering QA pairs:  50%|█████     | 10/20 [01:22<01:18,  7.86s/it]

Question: How does the addition of pentoxifylline to cardioplegic solutions potentially mitigate myocardial inflammatory reactions and ischemia/reperfusion injury during extracorporeal circulation?
LLM Answer: The addition of pentoxifylline to cardioplegic solutions during extracorporeal circulation, such as ...
Correctness Score: 0.7
Prompt: Answer this question with detailed information: Which cytochrome P-450 isoforms have been identified as significantly involved in the metabolism of disopyramide enantiomers, and what evidence supports this involvement?


Filtering QA pairs:  55%|█████▌    | 11/20 [01:33<01:18,  8.67s/it]

Question: Which cytochrome P-450 isoforms have been identified as significantly involved in the metabolism of disopyramide enantiomers, and what evidence supports this involvement?
LLM Answer: Disopyramide is an antiarrhythmic medication used to treat certain types of irregular heartbeat. It ...
Correctness Score: 0.3
Prompt: Answer this question with detailed information: What were the findings of the study regarding the antinociceptive effects of droperidol and morphine when administered intrathecally in a rat model?


Filtering QA pairs:  60%|██████    | 12/20 [01:41<01:07,  8.46s/it]

Question: What were the findings of the study regarding the antinociceptive effects of droperidol and morphine when administered intrathecally in a rat model?
LLM Answer: The study investigating the antinociceptive effects of droperidol and morphine when administered int...
Correctness Score: 0.0
Prompt: Answer this question with detailed information: What role do the HM74 (GPR109B) and HM74A (GPR109A) genes play in the treatment of dyslipidemias and what challenges are associated with them?


Filtering QA pairs:  65%|██████▌   | 13/20 [01:47<00:54,  7.74s/it]

Question: What role do the HM74 (GPR109B) and HM74A (GPR109A) genes play in the treatment of dyslipidemias and what challenges are associated with them?
LLM Answer: HM74 (GPR109B) and HM74A (GPR109A) are genes that encode for G-protein-coupled receptors, which are ...
Correctness Score: 0.7
Prompt: Answer this question with detailed information: How does the cytochrome P450 2D6 (CYP2D6) enzyme affect the pharmacokinetics of tolterodine in poor metabolizers compared to extensive metabolizers, and what are the implications for systemic clearance?


Filtering QA pairs:  70%|███████   | 14/20 [01:56<00:48,  8.10s/it]

Question: How does the cytochrome P450 2D6 (CYP2D6) enzyme affect the pharmacokinetics of tolterodine in poor metabolizers compared to extensive metabolizers, and what are the implications for systemic clearance?
LLM Answer: Cytochrome P450 2D6 (CYP2D6) is a key enzyme in the metabolism of many drugs, including tolterodine,...
Correctness Score: 0.8
Prompt: Answer this question with detailed information: How does the CYP2D6 phenotype affect the plasma concentration and H1-receptor occupancy of chlorpheniramine in individuals?


Filtering QA pairs:  75%|███████▌  | 15/20 [02:02<00:37,  7.52s/it]

Question: How does the CYP2D6 phenotype affect the plasma concentration and H1-receptor occupancy of chlorpheniramine in individuals?
LLM Answer: The CYP2D6 enzyme, part of the cytochrome P450 family, plays a crucial role in the metabolism of man...
Correctness Score: 0.7
Prompt: Answer this question with detailed information: What are the specific considerations for using atazanavir in treatment-experienced patients, and how does it differ from its use in treatment-naive patients?


Filtering QA pairs:  80%|████████  | 16/20 [02:10<00:30,  7.54s/it]

Question: What are the specific considerations for using atazanavir in treatment-experienced patients, and how does it differ from its use in treatment-naive patients?
LLM Answer: Atazanavir is an antiretroviral medication used in the treatment of HIV-1 infection. It is a proteas...
Correctness Score: 0.7
Prompt: Answer this question with detailed information: How does warfarin administration affect the utilization of phylloquinone and menaquinone-9 in the liver, and what does this indicate about their relative efficiency as substrates for vitamin K-dependent gamma-glutamyl carboxylase?


Filtering QA pairs:  85%|████████▌ | 17/20 [02:16<00:21,  7.08s/it]

Question: How does warfarin administration affect the utilization of phylloquinone and menaquinone-9 in the liver, and what does this indicate about their relative efficiency as substrates for vitamin K-dependent gamma-glutamyl carboxylase?
LLM Answer: Warfarin is an anticoagulant medication that works by inhibiting the vitamin K epoxide reductase com...
Correctness Score: 0.5
Prompt: Answer this question with detailed information: What are the differences in receptor binding affinities between amoxapine and loxapine, and how might these differences contribute to their psychopharmacological effects?


Filtering QA pairs:  90%|█████████ | 18/20 [02:24<00:14,  7.34s/it]

Question: What are the differences in receptor binding affinities between amoxapine and loxapine, and how might these differences contribute to their psychopharmacological effects?
LLM Answer: Amoxapine and loxapine are both antipsychotic medications, but they have distinct pharmacological pr...
Correctness Score: 0.5
Prompt: Answer this question with detailed information: What are the challenges in predicting the response to Methotrexate (MTX) therapy in rheumatoid arthritis patients, and how does pharmacogenetics aim to address these challenges?


Filtering QA pairs:  95%|█████████▌| 19/20 [02:32<00:07,  7.69s/it]

Question: What are the challenges in predicting the response to Methotrexate (MTX) therapy in rheumatoid arthritis patients, and how does pharmacogenetics aim to address these challenges?
LLM Answer: Predicting the response to Methotrexate (MTX) therapy in rheumatoid arthritis (RA) patients presents...
Correctness Score: 0.9
Prompt: Answer this question with detailed information: What are the mechanisms by which oxaprozin induces apoptosis in immune complex-activated monocytes, and how do these mechanisms compare to other NSAIDs?


Filtering QA pairs: 100%|██████████| 20/20 [02:42<00:00,  8.11s/it]

Question: What are the mechanisms by which oxaprozin induces apoptosis in immune complex-activated monocytes, and how do these mechanisms compare to other NSAIDs?
LLM Answer: Oxaprozin is a nonsteroidal anti-inflammatory drug (NSAID) primarily used to relieve pain and inflam...
Correctness Score: 0.3
Filtered to 8 QA pairs
Saved filtered data to passage_output.gt



