# Automated Question-Answer Generation from Pharmaceutical Data

This notebook systematically generates question-answer pairs from pharmaceutical data sources including:
1. PubMed target passages
2. DrugBank tables
3. Related pharmaceutical data

Each QA pair will include:
- Question
- Answer
- Source text/passage
- Related table

In [1]:
# Import required libraries
import os
import pandas as pd
import json
from pathlib import Path
from openai import OpenAI
import csv
from tqdm import tqdm
from collections import defaultdict

In [2]:
# Configuration and paths
PUBMED_TARGETS_DIR = 'inputs/pubmed-targets'
DRUGBANK_TABLES_DIR = 'inputs/drugbank-tables'
MAPPING_FILE = 'inputs/pubmed-drugbank-tables.gt'
OUTPUT_FILE = 'both_output.gt'

# Initialize your LLM API key if needed
# os.environ["OPENAI_API_KEY"]
client = OpenAI()


In [3]:
def load_passage_table_mapping():
    """Load the mapping between passages and their relevant tables"""
    mapping = defaultdict(list)
    with open(MAPPING_FILE, 'r') as f:
        for line in f:
            passage_id, table_name = line.strip().split(',')
            mapping[passage_id].append(table_name)
    # print("mapping", mapping)
    return mapping

def load_target_passages():
    """Load all target passages from the pubmed-targets directory"""
    passages = {}
    target_files = Path(PUBMED_TARGETS_DIR).glob('Target-*')
    
    for file_path in target_files:
        target_id = file_path.name
        with open(file_path, 'r') as f:
            passages[target_id] = f.read()
    # print("passages", passages)
    return passages

def load_drugbank_tables():
    """Load all relevant DrugBank tables"""
    tables = {}
    csv_files = Path(DRUGBANK_TABLES_DIR).glob('*.csv')
    
    for file_path in csv_files:
        table_name = file_path.stem
        tables[table_name] = pd.read_csv(file_path)
    
    # print("tables", tables)
    return tables

In [4]:
def get_relevant_table_content(tables, table_names, max_rows=5):
    """Extract relevant content from tables for context"""
    print("Debug - Available tables:", tables.keys())
    print("Debug - Looking for table_names:", table_names)

    table_content = {}
    for table_name in table_names:
        # Remove .csv extension if present
        base_table_name = table_name.replace('.csv', '')
        
        if base_table_name in tables:
            df = tables[base_table_name]
            table_content[base_table_name] = {
                'columns': list(df.columns),
                'sample': df.head(max_rows).to_dict('records')
            }
        else:
            print(f"Debug - Table '{base_table_name}' not found in available tables")
    return table_content

def generate_questions_for_passage(passage_id, passage_text, tables, relevant_table_names, model="gpt-4o"):
    """Generate questions for a given passage and its relevant tables using LLM"""

    # print("tables", tables)
    # print("relevant_table_names", relevant_table_names)
    
    # Limit passage length if too long (e.g., first 1000 characters)
    passage_text = passage_text[:1000] + "..." if len(passage_text) > 1000 else passage_text
    
    # Limit to maximum 3 relevant tables
    relevant_table_names = relevant_table_names[:3]
    
    # Get relevant table content
    table_content = get_relevant_table_content(tables, relevant_table_names)

    
    prompt = f"""
    Given the following passage and related tables, generate 3-5 meaningful question-answer pairs.
    IMPORTANT: Each question MUST be answerable using information from BOTH the passage AND the tables together.
    Only generate questions that require combining information from both sources.
    Focus on pharmaceutical and medical aspects.
    
    
    Passage (ID: {passage_id}):
    {passage_text}
    
    Related Tables:
    {json.dumps(table_content, indent=2)}
    
    Generate questions in the following format:
    1. question: [specific question about drug/treatment]
       answer: [detailed answer combining information from passage and tables]
       text: [passage ID if information from passage was used (e.g. Target-123456789)]
       table: [table name if information from table was used (e.g. drugbank-drug)]
    
    Ensure every question uses information from both the passage and at least one table.
    """
    
    # Call your LLM here with the prompt
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a medical and pharmaceutical expert tasked with generating detailed question-answer pairs about drugs and treatments."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    print(response.choices[0].message.content)
    
    # Parse the response into structured QA pairs
    qa_pairs = parse_llm_response(response.choices[0].message.content, passage_id)
    return qa_pairs

def parse_llm_response(response_text, passage_id):
    """Parse the LLM response into structured QA pairs"""
    qa_pairs = []
    
    # Split the response into individual QA entries
    entries = response_text.strip().split('\n\n')
    
    for entry in entries:
        if not entry.strip():
            continue
            
        lines = entry.strip().split('\n')
        current_qa = {
            'question': '',
            'answer': '',
            'text': passage_id,
            'table': 'None'  # Default value
        }
        
        for line in lines:
            line = line.strip()
            # Skip empty lines and numbering
            if not line or line.replace('.', '').strip().isdigit():
                continue
                
            # Parse each field using more robust splitting
            if 'question:' in line:
                current_qa['question'] = line.split('question:', 1)[1].strip()
            elif 'answer:' in line:
                current_qa['answer'] = line.split('answer:', 1)[1].strip()
            elif 'text:' in line:
                current_qa['text'] = line.split('text:', 1)[1].strip()
            elif 'table:' in line:
                table_value = line.split('table:', 1)[1].strip()
                # Handle NA, N/A, None cases
                current_qa['table'] = 'None' if table_value.upper() in ['NA', 'N/A', 'NONE'] else table_value
        
        # Only add complete QA pairs that have both question and answer
        if current_qa['question'] and current_qa['answer']:
            qa_pairs.append(current_qa.copy())  # Use copy to avoid reference issues
    
    return qa_pairs

In [9]:
def main():
    # Load mappings and data
    print("Loading passage-table mappings...")
    passage_table_mapping = load_passage_table_mapping()
    
    print("Loading target passages...")
    passages = load_target_passages()
    
    print("Loading DrugBank tables...")
    tables = load_drugbank_tables()
    
    # Initialize output list
    qa_pairs = []
    
    # Process each passage with its relevant tables
    for passage_id, passage_text in tqdm(list(passages.items())[:100]):
        if passage_id in passage_table_mapping:
            relevant_tables = passage_table_mapping[passage_id]
            # print(passage_id, passage_text, tables, relevant_tables)
            
            # Generate QA pairs using the passage and its relevant tables
            new_qa_pairs = generate_questions_for_passage(
                passage_id,
                passage_text,
                tables,
                relevant_tables
            )
            # new_qa_pairs = []
            print(new_qa_pairs)
            qa_pairs.extend(new_qa_pairs)
        else:
            print(f"No table mapping found for passage {passage_id}")
    
    # Save results
    with open(OUTPUT_FILE, 'w', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow(['question', 'answer', 'text', 'table'])  # header
        for qa_pair in qa_pairs:
            writer.writerow([
                qa_pair['question'],
                qa_pair['answer'],
                qa_pair['text'],
                qa_pair['table']
            ])
        
    print(f"Generated {len(qa_pairs)} question-answer pairs")

In [10]:
if __name__ == "__main__":
    main()

Loading passage-table mappings...
Loading target passages...
Loading DrugBank tables...


  tables[table_name] = pd.read_csv(file_path)
  0%|          | 0/100 [00:00<?, ?it/s]

Debug - Available tables: dict_keys(['drugbank-drug_trans_links', 'drugbank-enzymes_polypeptides_ext_id', 'drugbank-drug_manufacturers', 'drugbank-drug_sequences', 'drugbank-drug_external_links', 'drugbank-drug_international_brands', 'drugbank-drug_mixtures', 'drugbank-drug_packagers', 'drugbank-drug_reactions_enzymes', 'drugbank-drug_enzymes_articles', 'drugbank-targets', 'drugbank-drug_carriers_textbooks', 'drugbank-drug_trans_textbooks', 'drugbank-carriers_polypeptides_ext_id', 'drugbank-drugs_attachments', 'drugbank-drug_carriers_articles', 'drugbank-enzymes_polypeptides', 'drugbank-drug_trans_articles', 'drugbank-drug_experimental_properties', 'drugbank-transporters_polypeptides_ext_id', 'drugbank-drug_dosages', 'drugbank-drug_syn', 'drugbank-drug_targ_textbooks', 'drugbank-drug_enzymes_attachments', 'drugbank-drug_carriers_links', 'drugbank-drug_salts', 'drugbank-drugs_articles', 'drugbank-targets_actions', 'drugbank-drug_patents', 'drugbank-drug_affected_organisms', 'drugbank-qu

  1%|          | 1/100 [00:11<19:04, 11.57s/it]

1. question: What is the clinical role of Flumazenil, and how does it differ in terms of administration compared to Lepirudin?
   answer: Flumazenil is used to reverse the effects of benzodiazepine sedation and is typically administered intravenously due to its low oral bioavailability. In contrast, Lepirudin, another drug listed in the "drugbank-drug" table, is a liquid form that Bayer ceased producing in 2012. While the passage discusses Flumazenil's role and preferred administration route, the table provides details about Lepirudin's state and production history.
   text: Target-8693922
   table: drugbank-drug

2. question: How does the type of Flumazenil as an imidazobenzodiazepine compare to the type of Cetuximab listed in the DrugBank database?
   answer: Flumazenil is an imidazobenzodiazepine, which is a benzodiazepine antagonist used to reverse the effects of benzodiazepines. Cetuximab, on the other hand, is categorized as a biotech drug in the DrugBank database, specifically a

  2%|▏         | 2/100 [00:16<12:48,  7.85s/it]

1. question: What is the affinity range of dicyclomine, trihexyphenidyl, and pirenzepine for the M1 muscarinic receptor subtype, and how does it compare to that of atropine?
   answer: The affinity range of dicyclomine, trihexyphenidyl, and pirenzepine for the M1 muscarinic receptor subtype is between 3.7-14 nM, which approaches the affinity of atropine at 1.6 nM.
   text: Target-2432979
   table: drugbank-drugs_links

2. question: Which muscarinic receptor antagonists demonstrate selectivity between central M1 and peripheral cardiac and glandular muscarinic receptors?
   answer: Dicyclomine, trihexyphenidyl, and pirenzepine demonstrate selectivity between central M1 muscarinic receptors and peripheral cardiac and glandular muscarinic receptors, showing higher affinity for M1 and lower affinity for cardiac receptors, with intermediate affinity for glandular receptors.
   text: Target-2432979
   table: drugbank-drug

3. question: How do the muscarinic receptor selectivity profiles of di

  7%|▋         | 7/100 [00:25<04:29,  2.90s/it]

1. question: What is the mechanism of action for balsalazide in the treatment of ulcerative colitis, and how does it compare with other 5-ASA agents?
   answer: Balsalazide is a prodrug of 5-ASA that utilizes azoreduction by colonic bacteria to achieve sustained release of active 5-ASA throughout the colon. This mechanism allows for a more targeted delivery compared to other 5-ASA agents, leading to faster and more frequent symptomatic remission. The drug is well tolerated with a safety profile comparable to other oral 5-ASA agents.
   text: Target-19743890
   table: drugbank-drug

2. question: How does the safety profile of balsalazide compare to other oral 5-ASA agents according to the passage and available databases?
   answer: According to the passage, balsalazide has a safety profile comparable to other oral 5-ASA agents. Although specific details on balsalazide are not available in the tables provided, the passage indicates that it is well tolerated, suggesting parity in safety w

  8%|▊         | 8/100 [00:34<06:18,  4.11s/it]

1. question: What is the primary mechanism of action of Valdecoxib, as described in the passage, and how does this relate to any known drug targets listed in the tables?
   answer: Valdecoxib is described as a potent and selective inhibitor of COX-2 in the passage. The tables do not provide a direct match for Valdecoxib, but they do mention various drug targets for other drugs. Although a specific target for Valdecoxib is not listed, its action on COX-2, which is a part of the prostaglandin-endoperoxide synthases family, aligns with its anti-inflammatory properties.
   text: Target-10715145
   table: drugbank-targets

2. question: Based on the passage and the drugbank table, can you identify any similarities between Valdecoxib and other listed drugs in terms of their application or structure?
   answer: The passage describes Valdecoxib as a sulfonamide and a selective COX-2 inhibitor, falling under non-steroidal anti-inflammatory agents. While the drugbank table does not list Valdecoxi

  9%|▉         | 9/100 [00:45<08:16,  5.45s/it]

1. question: What is the primary modification site of human serum albumin when modified by amoxicillin in ex vivo samples, and is amoxicillin listed in the DrugBank tables provided?
   answer: The primary modification site of human serum albumin (HSA) when modified by amoxicillin (AX) in ex vivo samples is Lys 190. However, amoxicillin is not listed in the provided DrugBank tables, which focus on biotech drugs.
   text: Target-25088930
   table: drugbank-drug

2. question: How does the study's methodology for detecting amoxicillin modifications correlate with the types of drugs listed in the DrugBank tables?
   answer: The study uses advanced mass spectrometric approaches to detect amoxicillin modifications, including techniques like multiple reaction monitoring and high-resolution MS systems. While the DrugBank tables list biotech drugs such as Lepirudin and Cetuximab, which are not directly related to amoxicillin, these drugs also require precise analytical methods for characterizati

 10%|█         | 10/100 [00:52<08:39,  5.78s/it]

1. question: What is the primary protein that binds nifedipine in human serum, and is there an FDA-approved drug product related to this protein?
   answer: The primary protein that binds nifedipine in human serum is albumin, as indicated by its high fraction bound of 0.970 in albumin solution. Although there is no direct mention of an FDA-approved drug product specifically related to albumin in the provided tables, nifedipine itself is a well-studied drug related to cardiovascular treatments.
   text: Target-2872322
   table: drugbank-drug

2. question: How does the binding of nifedipine to alpha 1-acid glycoprotein (AAG) compare to its binding to albumin, and is there a related drug in the tables that can be associated with a protein binding mechanism?
   answer: Nifedipine binds less to alpha 1-acid glycoprotein (AAG) compared to albumin, with binding fractions ranging from 0.514 to 0.755 depending on AAG concentration, whereas it binds significantly to albumin with a fraction of 0.

 11%|█         | 11/100 [00:59<09:21,  6.30s/it]

1. question: What are the IC50 values for nifedipine in blocking T-type and L-type Ca2+ channel currents, and is nifedipine included in the drugbank-drug table?
   answer: The IC50 values for nifedipine in blocking T-type and L-type Ca2+ channel currents are 1.2 micromol/l and 0.14 nmol/l, respectively. However, nifedipine is not included in the sample rows of the drugbank-drug table provided.
   text: Target-16899990
   table: drugbank-drug

2. question: Which calcium channel blocker showed a combination of tonic and use-dependent blocking manners for T-type Ca2+ channel currents, and is it listed in the drugbank-drug table?
   answer: Efonidipine showed a combination of tonic and use-dependent blocking manners for T-type Ca2+ channel currents. However, efonidipine is not listed in the sample rows of the drugbank-drug table provided.
   text: Target-16899990
   table: drugbank-drug

3. question: How does mibefradil block T-type Ca2+ channel currents, and is there any related link in t

 12%|█▏        | 12/100 [01:10<10:42,  7.31s/it]

1. question: What was the primary purpose of the study involving topiramate, and which additional drugs were potentially used in combination according to the tables?
   answer: The primary purpose of the study was to evaluate the efficacy and safety of topiramate in patients with severe myoclonic epilepsy in infancy and refractory seizures. Topiramate was used as an add-on to one or two other baseline drugs, though specific drugs from the tables were not mentioned, topiramate is known to be a small molecule drug often used in combination with other antiepileptics.
   text: Target-11948006
   table: drugbank-drug

2. question: What were the reported adverse effects of topiramate in the study, and how does this relate to the general drug information provided in the tables?
   answer: In the study, the reported adverse effects of topiramate included weight loss, hypermenorrhoea, renal microlithiasis, nervousness, and dysarthric speech. While the tables do not list topiramate specifically,

 13%|█▎        | 13/100 [01:16<10:04,  6.94s/it]

1. question: How does mitotane potentially enhance the effectiveness of doxorubicin in treating adrenocortical carcinoma (ACC), and which efflux transporter is primarily involved in this interaction?
   answer: Mitotane has been shown to potentiate the cytotoxicity of chemotherapeutic drugs like doxorubicin in ACC by acting on cancer cells that display chemoresistance due to the presence of P-glycoprotein (P-gp), an efflux pump involved in cancer multidrug resistance. This suggests that mitotane may sensitize ACC cells to doxorubicin by directly affecting P-gp activity.
   text: Target-25096913
   table: drugbank-transporters

2. question: Which human transporter is implicated in the multidrug resistance of adrenocortical carcinoma, and what is the known action of mitotane on this transporter?
   answer: The human transporter implicated in the multidrug resistance of adrenocortical carcinoma is P-glycoprotein 1 (P-gp). The passage suggests that mitotane may have a direct action on P-gp

 15%|█▌        | 15/100 [01:22<07:28,  5.27s/it]

1. question: What role does the organic anion transporter-1 (Oat1) play in the kidney, and is there a known drug associated with this transporter in humans?
   answer: The organic anion transporter-1 (Oat1) in the kidney mediates the uptake of organic anions from the bloodstream in exchange for intracellular alpha-ketoglutarate. However, according to the drugbank-transporters table, there is no specific known drug action associated with the canalicular multispecific organic anion transporter 1 in humans, which is related to similar organic anion transport functions.
   text: Target-11641438
   table: drugbank-transporters

2. question: Can you identify a drug from the drugbank-drug table that is known to be produced using recombinant DNA technology, similar to the study's use of recombinant technology in Xenopus laevis oocytes?
   answer: Dornase alfa is a drug produced using recombinant DNA technology in genetically modified Chinese hamster ovary (CHO) cells, which is similar to the s

 16%|█▌        | 16/100 [01:28<07:32,  5.39s/it]

1. question: What is the significance of the transporter-mediated uptake of cerivastatin in its interaction with cyclosporin A, and which human transporter is known to be involved in this process?
   answer: The transporter-mediated uptake of cerivastatin is significant in its interaction with cyclosporin A because it represents a saturable process where more than 70% of cerivastatin uptake at therapeutic concentrations is facilitated by transporters. This process is inhibited by cyclosporin A, contributing to the drug-drug interaction. The human transporter involved in this process is the organic anion transporting polypeptide-2 (OATP2).
   text: Target-12538813
   table: drugbank-transporters

2. question: How does cyclosporin A affect the uptake of cerivastatin in OATP2-expressing cells, and what is the known action of the transporter involved?
   answer: Cyclosporin A inhibits the saturable uptake of cerivastatin in OATP2-expressing cells with a K(i) value of 0.2 microM. The known 

 19%|█▉        | 19/100 [01:35<05:14,  3.88s/it]

1. question: How does the presence of octanoic acid (OA) in intravenous HSA solutions affect the enantioselective protein binding of ketoprofen?
   answer: The presence of octanoic acid (OA) in currently marketed intravenous HSA solutions affects the enantioselective protein binding of ketoprofen by influencing the free concentration ratio of R(-) and S(+) ketoprofen enantiomers. This is determined through equilibrium dialysis and ultrafiltration experiments.
   text: Target-11022905
   table: drugbank-drugs_links

2. question: What experimental conditions were utilized to determine the binding characteristics of ketoprofen enantiomers, and how are they relevant to the marketed drug formulations?
   answer: The binding characteristics of ketoprofen enantiomers were determined using equilibrium dialysis and ultrafiltration with specific conditions of 67 mM Sörensen phosphate buffer at pH 7.4, 580 microM HSA, and 37 degrees C. These conditions help to understand the drug binding in marke

 20%|██        | 20/100 [01:41<05:48,  4.35s/it]

1. question: Which class of anthelmintics is used for controlling Dirofilaria immitis infections, and does the DrugBank database list any related drugs in this class?
   answer: The passage mentions that the avermectin/milbemycin (A/M) class of anthelmintics, which includes ivermectin, is used for controlling Dirofilaria immitis infections. However, the DrugBank sample provided does not list ivermectin or any related drugs in the avermectin/milbemycin class in the tables.
   text: Target-15313134
   table: drugbank-drug

2. question: What is the molecular target of ivermectin in Dirofilaria immitis, and are there any drugs in the drugbank-targets table that act similarly on invertebrate targets?
   answer: Ivermectin acts on invertebrate glutamate-gated chloride channels (GluCl) in Dirofilaria immitis. The drugbank-targets table lists targets like the Epidermal growth factor receptor and Prothrombin, which relate to human targets, not invertebrate-specific targets like GluCl.
   text: 

 21%|██        | 21/100 [01:49<06:39,  5.06s/it]

1. question: How do second-generation histamine H(1) receptor antagonists like desloratadine, fexofenadine, and levocetirizine compare in terms of cytochrome P450 inhibition with other biotech drugs listed in the database?
   answer: Second-generation histamine H(1) receptor antagonists, such as desloratadine, fexofenadine, and levocetirizine, are noted for having no major cytochrome P450 inhibition. This is a distinct advantage compared to many biotech drugs, which often have complex interactions, as indicated by detailed FDA labeling and synthesis references in the drugbank-drug table.
   text: Target-18336052
   table: drugbank-drug

2. question: What is the significance of the selectivity of second-generation antihistamines for the H(1) receptor compared to the target proteins listed in the drugbank-targets table?
   answer: The high selectivity of second-generation antihistamines for the H(1) receptor enhances their efficacy in treating allergic conditions while minimizing side ef

 22%|██▏       | 22/100 [01:58<07:57,  6.12s/it]

1. question: What is the proposed mechanism by which salmeterol achieves its long duration of action in asthma treatment, and is this mechanism associated with any specific targets listed in the drugbank-targets table?
   answer: Salmeterol achieves its long duration of action in asthma treatment by binding with high affinity to the beta2-adrenergic receptor (beta 2AR) and potentially to a secondary site termed the "exosite." However, the specific targets listed in the drugbank-targets table do not include beta2-adrenergic receptor, as they are primarily associated with drugs like Cetuximab and Lepirudin, which target different proteins such as the Epidermal growth factor receptor and Prothrombin, respectively.
   text: Target-10471277
   table: drugbank-targets

2. question: Considering the mechanism of action of salmeterol described in the passage, how does this compare to the type of drugs listed in the drugbank-drug table?
   answer: Salmeterol is a long-acting beta2-adrenergic rec

 23%|██▎       | 23/100 [02:07<08:42,  6.79s/it]

1. question: What is the selectivity profile of loperamide for human opioid receptors, and is there any related drug information in the database?
   answer: Loperamide demonstrates high selectivity for the human mu opioid receptor with a Ki of 3 nM, compared to the delta (Ki = 48 nM) and kappa (Ki = 1156 nM) receptors. However, there is no direct entry for loperamide in the provided drugbank-drug table, indicating that specific information about loperamide's database entry is not available.
   text: Target-10087042
   table: drugbank-drug

2. question: How does loperamide's mechanism of action in rodent models relate to its classification in the drug database?
   answer: Loperamide acts by stimulating [35S]guanosine-5'-O-(3-thio)triphosphate binding and inhibiting forskolin-stimulated cAMP accumulation in rodent models, indicating its role as an opioid receptor agonist. However, loperamide itself is not listed in the drugbank-drug table, which primarily includes biotech drugs like lepi

 24%|██▍       | 24/100 [02:17<09:31,  7.52s/it]

1. question: How does the presence of CYP2C9 variants affect the pharmacokinetics of diclofenac, and are there any FDA-approved labeling documents related to diclofenac?
   answer: The study aimed to understand the effect of CYP2C9 variants, specifically CYP2C9*2 (Arg144Cys) and CYP2C9*3 (Ile359Leu), on the pharmacokinetics of diclofenac. It found no evidence of impaired metabolism of oral diclofenac in individuals with these variants. Although the passage provides insight into the study's methodology and results, there is no direct link to FDA-approved labeling documents for diclofenac in the provided tables.
   text: Target-12534640
   table: drugbank-drugs_links

2. question: What target proteins are associated with the drugs in the study, and how does diclofenac's mechanism relate to these targets?
   answer: The passage describes diclofenac's effect on prostaglandin E2 and thromboxane B2, which are indicators of COX-2 and COX-1 activity. While the passage focuses on diclofenac, th

 25%|██▌       | 25/100 [02:22<08:44,  6.99s/it]

1. question: What is the mechanism by which disulfiram causes an aversive reaction in patients who consume alcohol, and is there any information about disulfiram in the provided drugbank tables?
   answer: Disulfiram causes an aversive reaction by inhibiting aldehyde dehydrogenase, which increases serum aldehyde concentration after alcohol ingestion and leads to symptoms such as flushing, nausea, and severe hypotension. However, the provided drugbank tables do not contain specific information about disulfiram, as the tables list other drugs like Lepirudin, Cetuximab, and others, but not disulfiram.
   text: Target-17220694
   table: drugbank-drug

2. question: How was severe hypotension managed in the disulfiram-ethanol reaction case described in the passage, and is there any related information in the drugbank tables?
   answer: In the described case, severe hypotension was managed with initial volume resuscitation and dopamine infusion, which failed, but noradrenaline successfully re

 27%|██▋       | 27/100 [02:29<06:33,  5.40s/it]

1. question: What is a potential alternative to morphine for managing severe cancer pain, and what is the main objective of the related review?
   answer: Hydromorphone is a potential alternative to morphine for managing severe cancer pain. The main objective of the review mentioned in the passage is to explore and assess the evidence for the efficacy of hydromorphone in the management of pain.
   text: Target-11869661
   table: drugbank-drug

2. question: Which drug is identified as having a known action on the epidermal growth factor receptor in humans, and how does this relate to the passage discussing pain management?
   answer: Cetuximab is identified as having a known action on the epidermal growth factor receptor in humans. While Cetuximab is not directly related to pain management as discussed in the passage, the management of cancer pain, such as with hydromorphone, may involve comprehensive treatment plans that include targeting growth factor receptors like those affected by 

 28%|██▊       | 28/100 [02:36<07:02,  5.86s/it]

1. question: What was the effect of the new vasodilator minoxidil on plasma renin activity in hypertensive patients, and is minoxidil listed in the drugbank-drugs table?
   answer: The treatment with minoxidil resulted in a statistically significant decline in plasma renin activity from 59 +/- 6.4 ng/ml/16 h before treatment to 42.7 +/- 3.3 ng/ml/16 h after treatment. However, minoxidil is not listed in the sample data of the drugbank-drugs table.
   text: Target-790007
   table: drugbank-drug

2. question: How did minoxidil affect blood pressure in the study, and is there any FDA label information available for minoxidil in the drugbank-drug table?
   answer: During the observation period, minoxidil reduced the average systolic blood pressure from 195 +/- 18 to 159 +/- 7 mm Hg and the diastolic blood pressure from 120 +/- 8.3 to 92.5 +/- 8 mm Hg. However, there is no FDA label information for minoxidil in the sample data of the drugbank-drug table.
   text: Target-790007
   table: dru

 29%|██▉       | 29/100 [02:45<07:51,  6.64s/it]

1. question: How does the mutation S248F in neuronal alpha4beta2 nicotinic receptors affect the drug sensitivity of carbamazepine (CBZ)?
   answer: The S248F mutation in neuronal alpha4beta2 nicotinic receptors makes the receptors three times more sensitive to carbamazepine (CBZ), a drug used in the treatment of partial epilepsies. This increased sensitivity is due to the presence of Phe248 residues, which are the main component of CBZ binding sites in the mutant receptor.
   text: Target-12110613
   table: drugbank-drug

2. question: What is the primary use of carbamazepine, and how does it interact with mutant neuronal receptors in the context of epilepsy?
   answer: Carbamazepine is primarily used in the treatment of partial epilepsies. In the context of the S248F mutant neuronal alpha4beta2 nicotinic receptors, it has an increased binding affinity and a higher number of blocking binding sites, which explains its effectiveness in treating conditions like autosomal dominant nocturnal

 30%|███       | 30/100 [02:51<07:36,  6.52s/it]

1. question: What is Lenalidomide's mechanism of action in treating Myelodysplastic Syndromes, and is this supported by any other drug targeting similar pathways?
   answer: Lenalidomide is anti-angiogenic, as it inhibits microvessel formation, which is a mechanism contributing to its efficacy in treating Myelodysplastic Syndromes. This is investigated through its effects on sprout formation by arterial rings and cord formation by endothelial cells. Although the tables do not list drugs with the same mechanism, they provide details on other drugs and their targets, such as Cetuximab, which targets the epidermal growth factor receptor, a pathway involved in angiogenesis.
   text: Target-18805433
   table: drugbank-targets

2. question: Is there a specific FDA label reference for Lenalidomide similar to those provided for other drugs, and how does Lenalidomide's approval compare to the drugs listed?
   answer: The passage states Lenalidomide is approved for specific conditions in MDS and

 32%|███▏      | 32/100 [02:58<05:52,  5.18s/it]

1. question: How does the action of morphine-6beta-glucuronide (M6G) on food intake in rats relate to the type of drugs listed in the DrugBank database?
   answer: Morphine-6beta-glucuronide (M6G) is an active metabolite of morphine that significantly increases food intake in rats by interacting with opioid receptors. The drugs listed in the DrugBank database, such as Lepirudin and Cetuximab, are primarily biotech drugs with specific clinical uses like anticoagulation and cancer treatment, respectively, and do not interact with opioid receptors or influence food intake in a manner similar to M6G.
   text: Target-9808678
   table: drugbank-drug

2. question: What information is available about drugs targeting specific receptors that might be related to the action of morphine-6beta-glucuronide (M6G) in the passage?
   answer: The passage discusses the action of M6G on opioid receptors, specifically the mu receptor, and the use of antagonists and antisense oligodeoxynucleotides to study t

 34%|███▍      | 34/100 [03:08<05:31,  5.02s/it]

1. Question: What specific receptors and antagonists are involved in dihydroergotamine's mechanism of action, and do any of these appear in the "drugbank-targets" table?
   Answer: Dihydroergotamine's mechanism involves 5-HT(1B/1D) receptors and alpha(2)-adrenoceptors. The antagonists used in the study include SB224289 for 5-HT(1B), rauwolscine for alpha(2), BRL44408 for alpha(2A), and MK912 for alpha(2C). The "drugbank-targets" table does not list these specific receptors or antagonists, indicating that none of these are directly associated with the drugs in the table.
   Text: Target-14744615
   Table: drugbank-targets

2. Question: Is dihydroergotamine listed in the "drugbank-drug" table, and if not, what does this imply about its status compared to the drugs listed?
   Answer: Dihydroergotamine is not listed in the "drugbank-drug" table. This implies that dihydroergotamine may not be classified under the biotech drugs listed in the table, which include drugs like Lepirudin, Cetuxim

 36%|███▌      | 36/100 [03:14<04:35,  4.31s/it]

1. question: What specific drug and receptor interaction is investigated in the passage, and how does this relate to the documented drugs in the tables?
   answer: The passage investigates the interaction between sufentanil, a central analgesic, and the delta-receptor using selective antagonists such as naltrindole (NTI) and naltriben (NTB) to address opioid-related respiratory impairment. However, sufentanil itself is not explicitly listed in the provided tables. Instead, the tables include other biotech drugs, such as lepirudin and cetuximab, which are not related to the opioid receptors discussed in the passage.
   text: Target-1330549
   table: drugbank-drug

2. question: How does the role of mu- and delta-receptors in opioid-related respiratory impairment, as mentioned in the passage, compare to the known actions of the drugs listed in the tables?
   answer: The passage suggests that it is still unclear if the mu- or delta-receptors are primarily responsible for mediating opioid-r

 40%|████      | 40/100 [03:21<03:01,  3.03s/it]

1. question: How does the mechanism of action of dofetilide relate to its classification as a class III antiarrhythmic agent?
   answer: Dofetilide is classified as a class III antiarrhythmic agent because it selectively blocks the rapid component of the cardiac ion channel delayed rectifier current, leading to an increase in the action potential duration and effective refractory period of the myocyte. This mechanism helps terminate reentrant tachyarrhythmias and prevent their re-induction, which is characteristic of class III antiarrhythmics.
   text: Target-17187453
   table: drugbank-drugs_links

2. question: What potential serious side effect of dofetilide is minimized by specific dosage adjustments, and how is this related to its recommended initial treatment setting?
   answer: The potential serious side effect of dofetilide is torsades de pointes, which can be minimized by adjusting the dosage according to creatinine clearance and QT(c) interval. To further mitigate this risk, t

 41%|████      | 41/100 [03:30<03:56,  4.01s/it]

1. question: How is Clopidogrel related to the P2Y12 receptor in terms of its pharmaceutical classification?
   answer: Clopidogrel is classified as a purinergic P2Y receptor antagonist, specifically targeting the P2Y12 receptor, which plays a role in inhibiting platelet aggregation. This interaction is clinically relevant for its use as a platelet aggregation inhibitor.
   text: Target-14668029
   table: drugbank-targets

2. question: What common feature does Clopidogrel share with Ticlopidine regarding their function on blood platelets?
   answer: Both Clopidogrel and Ticlopidine are classified as platelet aggregation inhibitors, which function by interacting with the P2Y12 receptor on blood platelets to prevent platelet aggregation.
   text: Target-14668029
   table: drugbank-targets

3. question: What is the significance of the P2Y12 receptor in the mechanism of action of Clopidogrel according to the passage, and is there a corresponding drug entry for Clopidogrel in the provided t

 43%|████▎     | 43/100 [03:39<03:53,  4.09s/it]

1. question: How is the binding orientation of mitoxantrone to DNA described in the passage, and what can we infer about its antitumor properties based on its molecular interactions?
   answer: The passage describes the binding orientation of mitoxantrone to DNA as having its anthraquinone chromophore perpendicular to the direction of inter-base hydrogen bonds, with side-chains fitting into the minor groove. This orientation is energetically favorable, suggesting a strong intercalative binding that may enhance its antitumor properties. This aligns with its classification as an antineoplastic agent, as seen in the passage. Additionally, the "drugbank-drug" table provides further context about its antitumor classification, although mitoxantrone is not directly listed in the provided table sample.
   text: Target-9701490
   table: drugbank-drug

2. question: What are the possible implications of the molecular dynamics study findings on the development of antitumor drugs like ametantrone a

 44%|████▍     | 44/100 [03:46<04:22,  4.68s/it]

1. question: What testing method was used to measure histamine H1-receptor occupancy (H1RO) in the study comparing fexofenadine and cetirizine, and how does it relate to the drug information available in the tables?
   answer: The study used positron emission tomography (PET) with (11)C-doxepin to measure histamine H1-receptor occupancy (H1RO) in the brain. This testing method relates to the drug information in the tables by demonstrating the importance of understanding receptor occupancy in assessing the efficacy and side effects of drugs, such as antihistamines like fexofenadine and cetirizine, which are not listed in the provided drugbank table samples but are relevant to the study's context.
   text: Target-15286093
   table: drugbank-drug

2. question: How does cetirizine's classification as a second-generation antihistamine relate to the type and classification of drugs in the drugbank-drug table?
   answer: Cetirizine is classified as a second-generation antihistamine in the pas

 46%|████▌     | 46/100 [03:53<03:51,  4.29s/it]

1. question: Which enzymes were detected in the tissue analysis of Spirometra mansonoides spargana that utilize coenzyme forms of vitamin B12, and what is the known action of the drug targeting the epidermal growth factor receptor in humans?
   answer: The enzyme methylmalonyl CoA mutase, which requires adenosylcobalamin, was detected in the tissue analysis of Spirometra mansonoides spargana. Cetuximab targets the epidermal growth factor receptor in humans, and its known action is to bind to this receptor.
   text: Target-18565
   table: drugbank-targets

2. question: What is the biological half-life of vitamin B12 taken up by spargana, and what is the primary key for the drug that targets prothrombin in humans?
   answer: The biological half-life of vitamin B12 taken up by spargana is about 7 weeks. The primary key for the drug that targets prothrombin in humans is DB00001.
   text: Target-18565
   table: drugbank-targets

3. question: Is methylcobalamin detected in the tissue of Spir

 47%|████▋     | 47/100 [04:04<04:53,  5.54s/it]

1. question: How is enalapril transported in cells, and is there any information in the tables about transporters that could be related to this mechanism?
   answer: Enalapril is transported in cells expressing oatp1, an organic anion transport polypeptide, as demonstrated in the passage. However, the tables do not list oatp1 specifically but do mention other transporters like P-glycoprotein 1 and Canalicular multispecific organic anion transporter 1, which are known to transport various substances in humans. This suggests that similar transporters might also be involved in enalapril's movement.
   text: Target-9794920
   table: drugbank-transporters

2. question: What is the relationship between the transport of enalapril in the study and the drug information provided in the tables?
   answer: The passage describes enalapril as being transported by oatp1 in a study using HeLa cells. The tables provide detailed information about other drugs, such as their types and biological origins, 

 48%|████▊     | 48/100 [04:11<04:59,  5.76s/it]

1. question: What is the selective target of solifenacin in the treatment of overactive bladder, and how is this information supported by the drugbank-targets table?
   answer: Solifenacin is a selective antagonist of the M(3) muscarinic receptor, as indicated in the passage. The drugbank-targets table lists specific targets for various drugs, but solifenacin itself is not directly listed in the sample, suggesting the need to refer to additional resources for comprehensive target information. The passage confirms solifenacin's selectivity towards the M(3) receptor, which is consistent with its therapeutic use for overactive bladder.
   text: Target-19446545
   table: drugbank-targets

2. question: How does the in vivo selectivity of solifenacin compare to oxybutynin in the treatment of overactive bladder according to the passage and the available data from drugbank-drug?
   answer: According to the passage, solifenacin exhibits greater activity in binding muscarinic receptors in vivo c

 49%|████▉     | 49/100 [04:20<05:35,  6.59s/it]

1. question: What is the primary function of almotriptan, and is it listed in the provided drugbank-drug table?
   answer: Almotriptan is a 5-HT1B/1D-receptor agonist used to treat migraines. However, it is not listed in the provided drugbank-drug table.
   text: Target-10824628
   table: drugbank-drug

2. question: What safety measures were used in the study assessing the interaction between almotriptan and verapamil, and does the drugbank-drug table provide any safety-related labels for the drugs listed?
   answer: The safety measures in the study included blood pressure and pulse measurements, electrocardiography, and adverse event monitoring. The drugbank-drug table provides FDA labels for some drugs, such as Lepirudin and Cetuximab, which may contain safety-related information.
   text: Target-10824628
   table: drugbank-drug

3. question: Are there any links to FDA-approved drug products related to the drugs studied (almotriptan and verapamil) in the drugbank-drugs_links table?
 

 50%|█████     | 50/100 [04:29<06:00,  7.21s/it]

1. question: Which fluoroquinolone was found to not produce resistant mutants in the study of Neisseria gonorrhoeae, and is this drug listed in the drugbank-drug table?
   answer: Gemifloxacin was the only fluoroquinolone in the study that did not produce resistant mutants of Neisseria gonorrhoeae. However, it is not listed in the provided entries of the drugbank-drug table.
   text: Target-11581235
   table: drugbank-drug

2. question: Was trovafloxacin, a drug mentioned in the study, associated with any stable increase in MIC without novel mutations, and does the drugbank-drug table provide any information about trovafloxacin?
   answer: Trovafloxacin was associated with a stable increase in MIC in one mutant without novel mutations. The drugbank-drug table does not provide specific information about trovafloxacin in the provided entries.
   text: Target-11581235
   table: drugbank-drug

3. question: Which fluoroquinolone mentioned in the passage was not subject to mutation frequency

 52%|█████▏    | 52/100 [04:37<04:43,  5.91s/it]

1. question: How does thyroid hormone regulation potentially affect the pharmacokinetics of drugs that are substrates of P-glycoprotein 1?
   answer: Thyroid hormones may regulate P-glycoprotein and influence the disposition of P-glycoprotein substrates such as digoxin and talinolol. The passage suggests this regulation is due to increased expression of P-glycoprotein 1, a transporter in humans, which was observed when thyroid function was altered. This increased expression could affect the pharmacokinetics of drugs like talinolol by altering their absorption and bioavailability.
   text: Target-12235446
   table: drugbank-transporters

2. question: Which drug studied in the passage is a substrate for P-glycoprotein, and how might its pharmacokinetics be influenced by changes in thyroid function?
   answer: Talinolol, a P-glycoprotein substrate, was studied in the passage. Changes in thyroid function, as hypothesized, may influence its pharmacokinetics by regulating the expression of P

 54%|█████▍    | 54/100 [04:43<03:38,  4.76s/it]

1. question: What are the primary receptors that eletriptan targets, and is there a related drug listed in the drugbank-drug table?
   answer: Eletriptan is a second-generation triptan that targets the 5-HT(1B/1D) receptors, which are associated with its efficacy in treating migraines. However, the drugbank-drug table provided does not list eletriptan specifically but includes various biotech drugs like Lepirudin, Cetuximab, and others.
   text: Target-11050304
   table: drugbank-drug

2. question: What potential side effect was observed with eletriptan on a specific artery, and does the drugbank-drug table provide information on similar drugs?
   answer: Eletriptan was observed to cause relaxation of the middle meningeal artery at a high concentration (100 microM), indicating a potential side effect. The drugbank-drug table, however, does not list eletriptan or similar triptans, focusing instead on biotech drugs such as Lepirudin and Etanercept.
   text: Target-11050304
   table: drug

 55%|█████▌    | 55/100 [04:47<03:32,  4.73s/it]

1. question: What is the significance of fenofibrate's half-life and how does it relate to patients with renal failure?
   answer: Fenofibrate has a plasma half-life of approximately 30 hours in normal persons at steady state with usual doses of 100 mg three times daily. However, because it is not dialyzable, fenofibrate has a markedly prolonged half-life in patients with renal failure and should not be used in such cases.
   text: Target-3318449
   table: drugbank-drug

2. question: How does the absorption of fenofibrate depend on food intake, and what is its mechanism of transport in the bloodstream?
   answer: Fenofibrate is extensively absorbed only in the presence of food, which enhances its absorption efficiency. Once absorbed, it is transported through the bloodstream by albumin.
   text: Target-3318449
   table: drugbank-drug

3. question: Why might fenofibrate not accumulate in the plasma or tissues, and what is its primary route of excretion?
   answer: Due to its short half-

 56%|█████▌    | 56/100 [04:56<04:03,  5.54s/it]

1. question: How does tramadol affect norepinephrine transporter (NET) function, and is there a related FDA label available for this drug?
   answer: Tramadol inhibits the desipramine-sensitive uptake of [3H]-norepinephrine (NE) by competing for the norepinephrine transporter, as indicated by the increase in the apparent Michaelis constant without changing the maximal velocity. However, there is no specific FDA label for tramadol provided in the "drugbank-drug" table, which lists FDA labels for other drugs.
   text: Target-11916794
   table: drugbank-drug

2. question: What is the inhibition constant (K(i)) of tramadol for NET, and does the DrugBank provide a URL for the FDA-approved label for tramadol?
   answer: The inhibition constant (K(i)) of tramadol for the norepinephrine transporter (NET) is 13.7 microM, as determined by competition assays. The "drugbank-drugs_links" table does not provide a specific URL for an FDA-approved label for tramadol, as it lists URLs for other drugs.


 57%|█████▋    | 57/100 [05:07<04:59,  6.97s/it]

1. question: Which K+ channel blockers were studied for their effect on fentanyl's antinociception in the passage, and are any of these blockers linked to the drug profiles in the provided tables?
   answer: The passage examined the effect of K+ channel blockers such as glibenclamide, tolbutamide, charybdotoxin (ChTX), apamin, tetraethylammonium chloride (TEA), 4-aminopyridine (4-AP), and cesium on fentanyl's antinociception. However, none of these specific blockers are listed in the drug profiles within the provided tables.
   text: Target-15665994
   table: drugbank-drug

2. question: What is the primary function of fentanyl as described in the passage, and how does this relate to any drug targets listed in the tables?
   answer: In the passage, fentanyl is described as a selective micro-opioid receptor agonist used to promote peripheral antinociception. This function relates to the concept of drug targeting specific receptors, similar to how Cetuximab targets the epidermal growth fa

 59%|█████▉    | 59/100 [05:12<03:32,  5.19s/it]

1. question: What is the affinity of amoxapine for the 5-HT2 receptor, and how does this relate to its psychopharmacological effects?
   answer: Amoxapine exhibits a high affinity for the 5-HT2 receptor, with a Ki value of less than 10(-7) mol/L, suggesting potent inhibitory effects on serotonin receptors which contribute to its psychopharmacological effects. However, specific information about amoxapine, such as its FDA approval and its detailed description, is not available in the provided tables.
   text: Target-1966571
   table: drugbank-drug

2. question: How do the affinities of amoxapine and loxapine for D2 receptors compare, and what might this imply about their use in treatment?
   answer: Both amoxapine and loxapine show high affinity for D2 receptors, with Ki values less than 10(-7) mol/L, suggesting they might be effective in treating conditions related to dopamine dysregulation. However, the specific drug information such as synthesis and FDA approval for these drugs is no

 61%|██████    | 61/100 [05:18<02:47,  4.30s/it]

1. question: What is the potential therapeutic use of cabergoline in Parkinson's disease, and what class of drug does it belong to?
   answer: Cabergoline, a specific dopamine D2 receptor agonist, has shown neuroprotective effects in SH-SY5Y human neuroblastoma cells by reducing oxidative stress-induced neuronal death, suggesting its potential use in Parkinson's disease therapy. It belongs to the class of drugs known as dopamine agonists, specifically targeting the dopamine D2 receptor.
   text: Target-12464354
   table: drugbank-drug

2. question: How does cabergoline's mechanism of action differ from haloperidol in terms of receptor interaction, and what evidence supports its neuroprotective effects?
   answer: Cabergoline acts as a dopamine D2 receptor agonist, whereas haloperidol is known to be a dopamine D2 receptor antagonist. The study showed that cabergoline provided neuroprotective effects against oxidative stress-induced neuronal death in a concentration-dependent manner, whi

 63%|██████▎   | 63/100 [05:29<02:55,  4.74s/it]

1. question: What is the primary plasma protein binding site for acitretin, and is acitretin listed among the drugs in the provided tables?
   answer: Acitretin primarily binds to albumin in plasma, accounting for 91% of its binding. However, acitretin is not explicitly listed among the drugs in the provided tables.
   text: Target-1449542
   table: drugbank-drug

2. question: How does the binding constant of acitretin to albumin compare to the drugs listed in the tables, and is acitretin found in any drug mixtures?
   answer: Acitretin has a binding constant of 0.7 x 10(6) M-1 to albumin. None of the drug mixtures listed in the tables mention acitretin as an ingredient.
   text: Target-1449542
   table: drugbank-drug_mixtures

3. question: Which drug described in the passage has a higher erythrocyte partitioning percentage, and can it be found in the drugbank-drug table?
   answer: Etretinate has a higher erythrocyte partitioning percentage of 14.5% compared to acitretin's 8.1%. Howev

 64%|██████▍   | 64/100 [05:35<02:57,  4.94s/it]

1. question: How does the administration of glucocorticoids affect memory performance in subjects with PTSD compared to nonpsychiatric controls, and is there any related drug that targets the epidermal growth factor receptor?
   answer: The administration of glucocorticoids is hypothesized to impair verbal declarative memory and working memory more in subjects with PTSD compared to nonpsychiatric controls. Cetuximab, a drug that targets the epidermal growth factor receptor, is also used in medical treatments but is not directly related to glucocorticoid administration for PTSD memory studies. 
   text: Target-16891588
   table: drugbank-targets

2. question: Which drug is associated with prothrombin as a target, and what is its relevance to the study of glucocorticoid sensitivity in PTSD?
   answer: Lepirudin is associated with prothrombin as a target. While Lepirudin itself is not directly relevant to the study of glucocorticoid sensitivity in PTSD, understanding drug mechanisms and t

 66%|██████▌   | 66/100 [05:44<02:42,  4.78s/it]

1. question: How does the protein binding of 5-Fluorouracil (FU) compare to antineoplastic drugs listed in the DrugBank database?
   answer: 5-Fluorouracil (FU) is carried in the serum by plasma proteins and binds specifically to human serum albumin (HSA), with at least one secondary binding site in the coumarin binding area. In comparison, the biotech antineoplastic drugs listed in the DrugBank database, such as Lepirudin, Cetuximab, Dornase alfa, Denileukin diftitox, and Etanercept, are primarily described in terms of their production and specific biochemical compositions, rather than their serum protein binding characteristics.
   text: Target-8573632
   table: drugbank-drug

2. question: What are the effects of aspirin on the binding affinity of 5-Fluorouracil (FU) and is there a similar interaction mentioned for the drugs in the DrugBank database?
   answer: Aspirin induces chemical acetylation of Lys199, which results in a decreased apparent binding affinity constant (nK) for 5-F

 69%|██████▉   | 69/100 [05:54<02:11,  4.24s/it]

1. question: How does the expression of P-glycoprotein 1 (P-gp) relate to the effects of darunavir on lymphocytes?
   answer: The passage indicates that exposure to darunavir, a second-generation protease inhibitor, causes a moderate, dose-dependent increase in P-gp expression in total, T, and T helper lymphocytes. P-glycoprotein 1 (P-gp) is a transporter protein found in humans, as listed in the "drugbank-transporters" table. This suggests that darunavir potentially affects the pharmacokinetics of antiretrovirals by influencing P-gp expression on lymphocytes.
   text: Target-23707228
   table: drugbank-transporters

2. question: What role does P-glycoprotein 1 play in the uptake of antiretroviral drugs like darunavir and raltegravir?
   answer: P-glycoprotein 1 (P-gp) can significantly affect the cellular uptake of antiretroviral drugs by hampering their access to HIV-1 replication sites. The passage notes that darunavir increases P-gp expression, which could alter drug distribution. 

 72%|███████▏  | 72/100 [06:07<01:57,  4.20s/it]

1. question: What is the potential for drug-drug interactions of Memantine (MEM) at the blood-brain barrier (BBB), and is there any known action of transporters involved in MEM's brain uptake?
   answer: Memantine (MEM) has a potential for drug-drug interactions at the blood-brain barrier (BBB) due to its saturable brain uptake and interaction with cationic transporter inhibitors. However, the passage states that little is known about MEM’s mechanism of transport across the BBB, and it aims to identify the putative transporter involved. In the related tables, the "drugbank-transporters" table lists transporters with unknown actions, suggesting limited information about specific transport mechanisms.
   text: Target-24098999
   table: drugbank-transporters

2. question: How does the study on Memantine (MEM) in Swiss Outbred mice relate to the knowledge about transporters in humans for other drugs?
   answer: The study on Memantine (MEM) in Swiss Outbred mice indicates saturable brain up

 73%|███████▎  | 73/100 [06:18<02:23,  5.32s/it]

1. question: How has the role of nicotine in treating Alzheimer's disease changed in recent years, and is there a specific drug from the tables that targets a related receptor?
   answer: The role of nicotine in treating Alzheimer's disease has gained attention because the disease is associated with down-regulation of cholinergic activity in the brain. A variety of nicotine derivatives are being tested and developed for this purpose. Cetuximab, found in the drugbank-drug table with the primary key DB00002, is an epidermal growth factor receptor (EGFr) binding FAB and can be related to the study of neuronal nicotinic ACh receptors as both involve receptor interaction mechanisms.
   text: Target-11006350
   table: drugbank-drug

2. question: Considering the environmental impacts of nicotine mentioned in the passage, what is the significance of lepirudin's production method from the table?
   answer: The passage highlights the increasing recognition of nicotine's significance from environ

 74%|███████▍  | 74/100 [06:26<02:28,  5.73s/it]

1. question: What are the specific effects of 4-chloropropofol on neuronal sodium channels, and is there any linkage to FDA-approved drugs for these effects?
   answer: 4-Chloropropofol was shown in the passage to suppress depolarization-induced whole-cell sodium inward currents in neuronal sodium channels (Na(V)1.2) with an IC(50) of 29.2 microM. However, there is no direct linkage to FDA-approved drugs for these specific effects in the provided tables, as the tables primarily focus on other drugs like Lepirudin and Cetuximab that do not relate to 4-chloropropofol.
   text: Target-18574460
   table: drugbank-drug

2. question: Are any of the drugs related to the effects on sodium channels mentioned in the passage listed in the drugbank-drug table?
   answer: The passage describes halogenated analogues of propofol acting on sodium channels, but the drugbank-drug table lists drugs such as Lepirudin, Cetuximab, Dornase alfa, Denileukin diftitox, and Etanercept, which are unrelated to the

 75%|███████▌  | 75/100 [06:33<02:32,  6.08s/it]

1. question: What is the mechanism of action of Galantamine in treating Alzheimer's disease, and are there any FDA-approved drugs with similar mechanisms in the provided data?
   answer: Galantamine treats Alzheimer's disease by combining allosteric modulation of nicotinic acetylcholine receptors and reversible, competitive inhibition of acetylcholinesterase. However, the provided tables do not include any FDA-approved drugs with a similar mechanism of action.
   text: Target-10971048
   table: drugbank-drug

2. question: How many patients were involved in the Phase III clinical trial program for Galantamine, and is there a similar large-scale study referenced in the drugbank-drug table?
   answer: Over 3,000 patients with mild-to-moderate Alzheimer's disease were enrolled in the Phase III clinical trial program for Galantamine. The drugbank-drug table does not reference a similarly large-scale study for any of the drugs listed.
   text: Target-10971048
   table: drugbank-drug

3. ques

 76%|███████▌  | 76/100 [06:43<02:46,  6.93s/it]

1. question: How does the presence of human serum affect the antifungal action of terbinafine, and is there any related drug in the database that shares a similar mechanism?
   answer: The presence of human serum antagonizes the antifungal action of terbinafine by non-specifically binding to major serum components like albumin and lipoproteins, thereby reducing its bioavailability. Naftifine and SDZ 87-469 are related allylamine compounds that also experience similar antagonism due to their lipophilicity. In the provided database, there is no direct mention of terbinafine, naftifine, or SDZ 87-469.
   text: Target-1287164
   table: drugbank-drugs_links

2. question: What is the mechanism of action of terbinafine, and is there any drug in the table that involves a similar biological production method?
   answer: Terbinafine acts by inhibiting ergosterol biosynthesis at the level of squalene epoxidase. While the provided tables do not list terbinafine specifically, they mention several b

 77%|███████▋  | 77/100 [06:50<02:37,  6.86s/it]

1. question: How does the experimental use of Xenopus laevis oocytes help in studying the transport mechanisms of nucleoside analogues like ribavirin, and what transporters are involved?
   answer: The experimental use of Xenopus laevis oocytes expressing both human concentrative nucleoside transporter 3 and human equilibrative transporter 1 allows for the study of nucleoside analogues like ribavirin by examining how these specific transporters facilitate drug uptake across cell membranes. The passage explains that these transporters are crucial for understanding the complex transport dynamics of anti-viral drugs through biological experiments.
   text: Target-20096989
   table: drugbank-transporters

2. question: What is the significance of human concentrative and equilibrative transporters in the transport of ribavirin, and are these transporters listed in the provided tables?
   answer: Human concentrative and equilibrative transporters play a crucial role in the transport of ribavi

 79%|███████▉  | 79/100 [06:57<01:56,  5.57s/it]

1. question: How does the interaction of risperidone with P-glycoprotein influence its distribution across the blood-brain barrier, and what is the known action of P-glycoprotein in humans?
   answer: Risperidone is a substrate of P-glycoprotein (P-gp), which actively extrudes drugs into the neural capillaries, thus influencing its distribution across the blood-brain barrier. The V(max)/K(m) ratio of 2.7 for risperidone indicates it is a better substrate for P-gp compared to its metabolite. Additionally, the known action of P-glycoprotein in humans is not specified, as indicated by the "unknown" action in the drugbank-transporters table.
   text: Target-16118767
   table: drugbank-transporters

2. question: What experimental approach was used to determine the interaction of risperidone and its metabolite with P-glycoprotein, and is this method linked to any known drug in the drugbank-drug table?
   answer: The interaction of risperidone and its metabolite with P-glycoprotein was determ

 80%|████████  | 80/100 [07:05<02:03,  6.16s/it]

1. question: What is the primary clinical use of dexrazoxane, and is there any FDA-related documentation available for it in the tables?
   answer: Dexrazoxane is primarily used for reducing the incidence and severity of cardiomyopathy associated with doxorubicin administration in women with metastatic breast cancer. However, the provided tables do not include any specific FDA documentation or links related to dexrazoxane.
   text: Target-9549639
   table: drugbank-drugs_links

2. question: Which chelator mentioned in the passage is associated with cardiovascular disease treatment, and does it appear in the drugbank-drugs table with details such as its state or synthesis method?
   answer: Ethylenediaminetetraacetic acid (EDTA) is the chelator associated with cardiovascular disease treatment mentioned in the passage. However, it does not appear in the sample entries of the drugbank-drug table provided, hence no details such as its state or synthesis method are available in the tables.


 81%|████████  | 81/100 [07:14<02:06,  6.66s/it]

1. question: What evidence suggests that bumetanide is not transported by the Ntcp and oatp carriers in liver parenchymal cells, and does the drugbank-transporters table support the existence of alternative transporters?
   answer: The passage describes an experiment where the expression of Ntcp and oatp in Xenopus laevis oocytes did not increase [3H]bumetanide uptake, suggesting that bumetanide is not transported by these carriers. Instead, it is taken up by a different organic anion transport system. The drugbank-transporters table lists transporters like the Canalicular multispecific organic anion transporter 1, which could potentially represent the alternative transport system for bumetanide in humans.
   text: Target-8652636
   table: drugbank-transporters

2. question: Considering the passage's findings on bumetanide uptake, which transporter listed in the drugbank-transporters table might be involved in the process, and what is its known action?
   answer: The passage indicates 

 82%|████████▏ | 82/100 [07:23<02:11,  7.29s/it]

1. question: What was the cumulative dose and method of administration for etoposide in the treatment of the patient who developed therapy-related acute myeloblastic leukemia (t-AML)?
   answer: The patient received a cumulative dose of 5500 mg of etoposide, with 1500 mg administered intravenously and 4000 mg given orally.
   text: Target-9631585
   table: drugbank-drug

2. question: Which FDA-approved drugs for intramuscular and oral use are linked to the parent key DB00007 in the provided data?
   answer: The FDA-approved drugs linked to the parent key DB00007 include Lupaneta Pack for intramuscular and oral use, Lupron Depot (leuprolide acetate) depot suspension, Lupron Depot-Ped (leuprolide acetate) depot suspension, and Fensolvi (leuprolide acetate) suspension.
   text: Target-9631585
   table: drugbank-drugs_links

3. question: What are the chromosomal abnormalities detected in the patient, and how do they relate to the drugs administered as indicated in the passage?
   answer: T

 84%|████████▍ | 84/100 [07:30<01:30,  5.66s/it]

1. question: What is the bioavailability of hydroxychloroquine and chloroquine, and is there any link to FDA labels in the provided tables?
   answer: Hydroxychloroquine and chloroquine have a bioavailability of 0.7-0.8 when administered orally. However, the provided tables do not contain specific FDA labels or links related to hydroxychloroquine or chloroquine, but they do mention FDA labels for other drugs such as Lepirudin and Cetuximab.
   text: Target-8803904
   table: drugbank-drug

2. question: How does the excretion of hydroxychloroquine and chloroquine compare to the types of drugs listed in the tables?
   answer: Hydroxychloroquine and chloroquine are excreted renally with 40-50% of the drug being excreted unchanged. The drugs listed in the tables, such as Lepirudin and Cetuximab, are biotech drugs typically administered differently and are not directly compared in excretion methods in the tables.
   text: Target-8803904
   table: drugbank-drug

3. question: Are there any ste

 86%|████████▌ | 86/100 [07:34<01:00,  4.35s/it]

1. question: What is the role of P-glycoprotein 1 in the elimination of digoxin, and is it associated with any specific drug in the drugbank-transporters table?
   answer: P-glycoprotein 1 acts as a digoxin pump at the apical membrane of the nephron, facilitating the elimination of digoxin from the circulation by the kidney. In the drugbank-transporters table, P-glycoprotein 1 is listed with the id "BE0001032" and is associated with several drugs, including one with the parent_key "DB00027."
   text: Target-14993604
   table: drugbank-transporters

2. question: How does the human organic anion transporter OATP4C1 interact with digoxin, and is there any entry related to digoxin in the drugbank-drug table?
   answer: Human OATP4C1 transports digoxin in a sodium-independent manner with a K(m) of 7.8 microM. However, there is no specific entry for digoxin in the provided sample of the drugbank-drug table.
   text: Target-14993604
   table: drugbank-drug

3. question: What information is av

 88%|████████▊ | 88/100 [07:42<00:51,  4.33s/it]

1. question: Which drug was introduced to monitor influenza resistance and is associated with a known action on Prothrombin in humans?
   answer: The drug zanamivir, a neuraminidase inhibitor introduced to monitor influenza resistance, is associated with a known action on Prothrombin in humans. This information is derived from the passage discussing the introduction of neuraminidase inhibitors and the table associating Prothrombin with the drug Lepirudin (DB00001), which is included in discussions of influenza treatments.
   text: Target-12821478
   table: drugbank-targets

2. question: Which influenza treatment has a similar mechanism to Cetuximab, targeting a component in humans, and how is it described in the table?
   answer: The influenza treatment zanamivir, similar to Cetuximab which targets the epidermal growth factor receptor in humans, functions as a neuraminidase inhibitor. Cetuximab is described in the table as a biotech drug targeting the epidermal growth factor receptor w

 91%|█████████ | 91/100 [07:48<00:28,  3.19s/it]

1. question: How do chlorhexidine and alexidine potentially contribute to the neutralization of bacterial cell wall components, and are they listed in the drugbank-drug table?
   answer: Chlorhexidine and alexidine are amphipathic bisbiguanides capable of binding to lipopolysaccharide (LPS) and lipoteichoic acid (LTA) from bacteria, which could help neutralize these components and prevent inflammation. Despite their potential in neutralizing bacterial membrane components, they are not listed in the provided sample from the drugbank-drug table.
   text: Target-18635521
   table: drugbank-drug

2. question: Are there any drugs listed in the drugbank-targets table that have a mechanism involving Toll-like receptors similar to the action of chlorhexidine and alexidine?
   answer: The passage discusses chlorhexidine and alexidine potentially neutralizing bacterial components that stimulate Toll-like receptors (TLRs). However, the drugbank-targets table does not list any drugs specifically t

 92%|█████████▏| 92/100 [08:02<00:40,  5.04s/it]

1. question: How does omeprazole treatment affect MRP3 expression in human livers, and is omeprazole listed in the drugbank-drugs table?
   answer: Omeprazole treatment significantly increases MRP3 expression in human livers, showing a 4.8-fold higher MRP3 protein expression compared to those not treated with omeprazole. However, omeprazole is not listed in the provided sample from the drugbank-drugs table.
   text: Target-12538803
   table: drugbank-drug

2. question: Is there any information about transporters related to bile salts in the drugbank-transporters table, and how do they relate to MRP3 function?
   answer: The drugbank-transporters table lists the "Bile salt export pump" and "Ileal sodium/bile acid cotransporter" as transporters related to bile salts. These transporters, like MRP3, are involved in the transport of bile salts, although MRP3 specifically transports bile salts and conjugated xenobiotics from cells into the blood.
   text: Target-12538803
   table: drugbank-t

 93%|█████████▎| 93/100 [08:08<00:36,  5.25s/it]

1. question: What is the mechanism of action of thiazide-like diuretics in treating arterial hypertension, and which drug from the provided tables is known for a similar mechanism involving membrane potentials?
   answer: Thiazide-like diuretics treat arterial hypertension by their chronic vasodilatory effect, which is linked to the activation of large conductance voltage- and calcium-activated potassium channels affecting membrane potentials. While the provided tables do not list a thiazide-like diuretic specifically, they include drugs like Lepirudin, which do not share this mechanism but are used for other therapeutic purposes in humans, as indicated in the drugbank-targets table.
   text: Target-29188803
   table: drugbank-targets

2. question: Which drug from the table is associated with a human protein target, and how does this relate to the treatment of hypertension as described in the passage?
   answer: Cetuximab, listed in the tables, targets the epidermal growth factor recep

 94%|█████████▍| 94/100 [08:15<00:33,  5.65s/it]

1. question: What is the role of dopamine in alveolar epithelial sodium and fluid transport, and how is it categorized in the drugbank database?
   answer: The passage indicates that dopamine, a beta-1 adrenergic agonist, was tested for its effect on alveolar epithelial sodium and fluid transport but showed no effect whether administered intra-alveolar or intravenously. However, the tables do not list dopamine specifically, so its categorization in the drugbank database isn't provided in the dataset.
   text: Target-9279221
   table: drugbank-drug

2. question: How does dobutamine influence alveolar liquid clearance, and what type of drug is it classified as in the drugbank database?
   answer: According to the passage, dobutamine, which acts as both a beta-1 and beta-2 adrenergic agonist, increases alveolar liquid clearance when administered either intra-alveolar or intravenously. The information from the tables does not provide a direct entry for dobutamine, so its classification in 

 96%|█████████▌| 96/100 [08:22<00:19,  4.79s/it]

1. question: What is the competitive inhibition constant (Ki) of fusidate for the modified CATIII enzyme with substitutions Q92C/N146F/Y168F/I172V, and how does this relate to the data provided in the drugbank-drug table?
   answer: The competitive inhibition constant (Ki) of fusidate for the modified CATIII enzyme with substitutions Q92C/N146F/Y168F/I172V is 5.4 microM, as identified through site-directed mutagenesis and kinetic characterization. However, fusidic acid itself is not listed in the drugbank-drug table, which focuses on biotech drugs such as lepirudin and cetuximab. This suggests that fusidate, while a potent inhibitor, might not be categorized under the drugs listed in the provided tables.
   text: Target-7500366
   table: drugbank-drug

2. question: How does the structural analysis of fusidate binding to CATI and CATIII variants relate to the drug types listed in the drugbank-drug table?
   answer: The structural analysis of fusidate binding to CATI and CATIII variants 

 97%|█████████▋| 97/100 [08:30<00:16,  5.51s/it]

1. question: What is the significance of bioretrosynthesis in the engineering of biosynthetic pathways, specifically for the antiretroviral drug didanosine, and is it associated with any existing FDA-approved drugs in the database?
   answer: Bioretrosynthesis is significant as it allows for the construction of biosynthetic pathways using the retrograde evolution hypothesis, as demonstrated in the engineered pathway for the antiretroviral nucleoside analog didanosine. Although didanosine's engineered pathway is an application of bioretrosynthesis, it does not appear directly related to the FDA-approved drugs listed in the database, which include leuprolide acetate products and other biotech drugs like Lepirudin, Cetuximab, Dornase alfa, Denileukin diftitox, and Etanercept.
   text: Target-24657930
   table: drugbank-drugs_links

2. question: How does the engineering process described in the passage for didanosine synthesis relate to the biotechnology drugs listed in the database?
   an

 99%|█████████▉| 99/100 [08:54<00:08,  8.11s/it]

1. **Question**: How does glyburide affect the mitoKATP channel under different experimental conditions, and is it included in the DrugBank dataset?
   **Answer**: Glyburide is completely insensitive to the mitoKATP channel when the channel's open state is induced by the absence of ATP and Mg2+, but becomes highly sensitive when the open state is induced by Mg2+, ATP, a physiological opener like GTP, or a pharmacological opener like diazoxide. However, there is no specific entry for glyburide in the provided DrugBank dataset sample.
   **Text**: Target-9593694
   **Table**: drugbank-drug

2. **Question**: Are there any FDA-approved products related to the drugs discussed in the passage, specifically focusing on the effects of glyburide and 5-HD on mitochondria?
   **Answer**: The passage discusses the effects of glyburide and 5-HD on the mitoKATP channel, but the provided tables do not list any FDA-approved products specifically related to these drugs. The tables mainly list other drug

100%|██████████| 100/100 [09:03<00:00,  5.43s/it]

1. question: How is the clinical efficacy of tamoxifen potentially influenced, and which specific genes are investigated for their role in tamoxifen metabolism?
   answer: The clinical efficacy of tamoxifen is potentially influenced by the activity of drug-metabolizing enzymes and transporters involved in its formation, metabolism, and elimination. The study specifically investigates allelic variants of the CYP2D6 enzyme and haplotype-tagging SNPs of transporter genes ABCB1, ABCC2, and ABCG2 for their role in tamoxifen metabolism.
   text: Target-20124171
   table: drugbank-transporters

2. question: Which transporters are relevant to the study of tamoxifen's clinical outcomes, and what organisms are they associated with?
   answer: The transporters relevant to the study of tamoxifen's clinical outcomes are P-glycoprotein 1 (associated with ABCB1), and Canalicular multispecific organic anion transporter 1 (associated with ABCC2), and they are both associated with humans.
   text: Targe




In [11]:
def paraphrase_questions(input_csv, output_csv, model="gpt-4o"):
    """
    Paraphrase questions to remove references to specific tables or passages
    
    Args:
        input_csv: Path to input CSV file with original questions
        output_csv: Path to output CSV file for paraphrased questions
        model: LLM model to use for paraphrasing
    """
    # Read the input CSV
    df = pd.read_csv(input_csv, quoting=csv.QUOTE_ALL)
    
    # New list to hold the updated rows
    paraphrased_data = []
    
    # Process each question
    for _, row in tqdm(list(df.iterrows()), desc="Paraphrasing questions"):
        question = row['question']
        
        # Create prompt for the LLM
        prompt = f"""
        Your task is to paraphrase the following question to remove references to specific tables, datasets, or passages.
        If the question is solely about dataset structure or tables (e.g., "Does the dataset provide any links..."), return "REMOVE" to indicate it should be filtered out.
        Otherwise, paraphrase the question to make it more general while preserving its core content.

        Examples:
        - "Does the data set provide any links or references for Flumazenil in the drugbank-drugs_links table?" → "REMOVE"
        - "Which muscarinic receptor antagonist examined in the passage has affinity values closest to that of atropine and is also a biotech drug listed in the drugbank-drug table?" → "Which muscarinic receptor antagonist has affinity values closest to that of atropine and is also a biotech drug?"
        - "What is the significance of identifying Lys 190 as the modification site of HSA in the study, and how does it contrast with any protein-related drugs in the tables?" → "What is the significance of identifying Lys 190 as the modification site of HSA, and how does it contrast with any protein-related drugs?"

        Original question: {question}
        Paraphrased question:
        """
        
        # Call the LLM for paraphrasing
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are an expert in reformulating pharmaceutical questions to make them more general and independent of specific data sources."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3
        )
        
        paraphrased = response.choices[0].message.content.strip()
        
        # Skip questions that should be removed
        if paraphrased == "REMOVE":
            continue
            
        # Add the paraphrased question and original answer/text/table to the new data
        paraphrased_data.append({
            'question': paraphrased,
            'answer': row['answer'],
            'text': row['text'],
            'table': row['table']
        })
    
    # Write the paraphrased data to the output CSV
    with open(output_csv, 'w', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow(['question', 'answer', 'text', 'table'])  # header
        for item in paraphrased_data:
            writer.writerow([
                item['question'],
                item['answer'],
                item['text'],
                item['table']
            ])
    
    print(f"Processed {len(df)} questions, kept {len(paraphrased_data)} after paraphrasing and filtering")
    return paraphrased_data

In [None]:
# paraphrased_data = paraphrase_questions('both_output.csv', 'paraphrased_output.csv')

In [12]:
def verify_qa_uses_both_sources(input_csv, output_csv, model="gpt-4o"):
    """
    Verify that each QA pair in the CSV actually uses information from both the passage and table
    
    Args:
        input_csv: Path to input CSV file with QA pairs
        output_csv: Path to output CSV file for verified QA pairs
        model: LLM model to use for verification
    """
    # Read the input CSV
    df = pd.read_csv(input_csv, quoting=csv.QUOTE_ALL)
    
    # Load all passages and tables (caching to avoid repeated loading)
    passages = load_target_passages()
    tables = load_drugbank_tables()
    
    # New list to hold the verified rows
    verified_data = []
    
    # Process each QA pair
    for _, row in tqdm(list(df.iterrows()), desc="Verifying QA pairs"):
        question = row['question']
        answer = row['answer']
        passage_id = row['text']
        table_name = row['table']
        
        # Get passage content
        passage_content = passages.get(passage_id, "Passage content not available")
        
        # Get table content (sample)
        table_content = {}
        base_table_name = table_name.replace('.csv', '')
        if base_table_name in tables:
            df_table = tables[base_table_name]
            table_content = {
                'columns': list(df_table.columns),
                'sample': df_table.head(5).to_dict('records')
            }
        else:
            table_content = {"error": f"Table {base_table_name} not found"}
        
        # Create prompt for the LLM
        prompt = f"""
        Your task is to determine if the provided answer to a question uses information from BOTH the passage AND the table.
        
        Question: {question}
        Answer: {answer}
        
        Passage:
        {passage_content[:2000]}  # Limiting to first 2000 chars for token constraints
        
        Table ({table_name}):
        {json.dumps(table_content, indent=2)}
        
        Does the answer draw specific information from BOTH the passage AND the table? Reply with just "YES" or "NO".
        If the answer only uses information from one source or could be answered using only the passage or only the table, reply "NO".
        """
        
        # Call the LLM for verification
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a verification expert who determines if answers draw from multiple data sources."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )
        
        verification = response.choices[0].message.content.strip().upper()
        
        # Only keep pairs that use both sources
        if "YES" in verification:
            verified_data.append({
                'question': question,
                'answer': answer,
                'text': passage_id,
                'table': table_name
            })
    
    # Write the verified data to the output CSV
    with open(output_csv, 'w', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow(['question', 'answer', 'text', 'table'])  # header
        for item in verified_data:
            writer.writerow([
                item['question'],
                item['answer'],
                item['text'],
                item['table']
            ])
    
    print(f"Processed {len(df)} QA pairs, kept {len(verified_data)} that use both passage and table")
    return verified_data

# def load_target_passages():
#     """Load all target passages from the pubmed-targets directory"""
#     passages = {}
#     target_files = Path('inputs/pubmed-targets').glob('Target-*')
    
#     for file_path in target_files:
#         target_id = file_path.name
#         with open(file_path, 'r') as f:
#             passages[target_id] = f.read()
#     return passages

# def load_drugbank_tables():
#     """Load all relevant DrugBank tables"""
#     tables = {}
#     csv_files = Path('inputs/drugbank-tables').glob('*.csv')
    
#     for file_path in csv_files:
#         table_name = file_path.stem
#         tables[table_name] = pd.read_csv(file_path)
    
#     return tables

# After running this, you can apply the paraphrasing function to the verified QA pairs
def process_qa_pipeline(input_csv):
    """Complete pipeline to process QA pairs - verify and paraphrase"""
    verified_csv = 'verified_output.csv'
    final_csv = 'paraphrased_output.csv'
    
    # Step 1: Verify QA pairs use both sources
    verify_qa_uses_both_sources(input_csv, verified_csv)
    
    # Step 2: Paraphrase verified questions to remove data source references
    paraphrase_questions(verified_csv, final_csv)
    
    print(f"Process complete. Final output saved to {final_csv}")

In [14]:
process_qa_pipeline('both_output.csv')

  tables[table_name] = pd.read_csv(file_path)
Verifying QA pairs: 100%|██████████| 244/244 [02:31<00:00,  1.61it/s]


Processed 244 QA pairs, kept 77 that use both passage and table


Paraphrasing questions: 100%|██████████| 77/77 [01:49<00:00,  1.42s/it]

Processed 77 questions, kept 77 after paraphrasing and filtering
Process complete. Final output saved to paraphrased_output.csv



