In [None]:
!pip install openai pandas xlrd



In [None]:
import openai
import pandas as pd
import time

# Read the API key from file (adjust the path as needed)
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

# Initialize the client (using the OpenAI class)
from openai import OpenAI
client = OpenAI(api_key=api_key)

# Set the API calling mode here. Options: "chat", "chat_web", "response", "response_web"
mode = "response_web"  # change this to "chat", "response", or "response_web" as needed

def construct_prompt(row):
    """
    Constructs the prompt for a given row without including model rank.
    """
    prompt = (
        "You are an expert biomedical researcher. For the following drug–disease entry, please verify whether the drug is effective against the target disease based on its mechanism, known pharmacodynamics, and available literature.\n\n"
        f"Input Data:\n"
        f"- Disease: {row['Disease_name']} (ID: {row['Disease_ID']})\n"
        f"- Drug: {row['Drug_name']} (ID: {row['Drug_ID']})\n"
        f"- Description: {row['description']}\n"
        f"- Mechanism: {row['mechanism_of_action']}\n"
        f"- protein_binding: {row['protein_binding']}\n"
        f"- category: {row['category']}\n"
        f"- which model predicted the drug: {row['Model']}\n"
        f"- Pharmacodynamics: {row['pharmacodynamics']}\n\n"
        "Start with Yes/No/Not sure, please provide a concise validation explanation in no more than 50 words that states whether the drug is likely effective against the disease, based on the above data.\n\n"
        "Output format:\n"
        "Validation: [short explanation]."
    )
    return prompt

def call_gpt_mode(prompt, mode):
    """
    Calls the GPT-4o API based on the selected mode.
    Modes:
      - chat: uses client.chat.completions.create without web search
      - response: uses client.responses.create without web search
      - response_web: uses client.responses.create with web search tool enabled
    """
    try:
        if mode == "chat":
            completion = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2,
                #max_tokens=150
            )
            return completion.choices[0].message.content.strip()
        elif mode == "response":
            response = client.responses.create(
                model="gpt-4o",
                input=prompt,
                temperature=0.2,
                #max_tokens=150
            )
            for item in response.output:
                if hasattr(item, "content"):
                    for content_item in item.content:
                        if hasattr(content_item, "text"):
                            return content_item.text.strip()
            return "No output found in response structure."
        elif mode == "response_web":
            response = client.responses.create(
                model="gpt-4o",
                tools=[{"type": "web_search_preview"}],
                input=prompt,
                temperature=0.2,
                #max_tokens=150
            )
            for item in response.output:
                if hasattr(item, "content"):
                    for content_item in item.content:
                        if hasattr(content_item, "text"):
                            return content_item.text.strip()
            return "No output found in response structure."
        else:
            raise ValueError("Invalid mode selected.")
    except Exception as e:
        print("Error during API call:", e)
        return "API call error."

# Load the Excel file (Version 1 - simple dataset)
df = pd.read_excel("/content/drive/MyDrive/KG/drugs_for_llm_validation_updated.xlsx", sheet_name=0)

# (Optional) Apply filtering if desired. For example:
#df = df[df['Disease_name'].str.contains("cardiomyopathy due to anthracyclines", case=False)]

results = []

# Process each row one by one
for idx, row in df.iterrows():
    prompt = construct_prompt(row)
    print(f"Processing row {idx+1}: {row['Drug_name']} for {row['Disease_name']}")
    validation_output = call_gpt_mode(prompt, mode)
    results.append({
        "row_index": idx,
        "Disease_ID": row["Disease_ID"],
        "Disease_name": row["Disease_name"],
        "Drug_ID": row["Drug_ID"],
        "Drug_name": row["Drug_name"],
        "Model": row["Model"],
        "Validation_Output": validation_output
    })
    time.sleep(1)  # pause to respect rate limits

# Save the results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv("/content/drive/MyDrive/KG/validation_results_simple4t2.csv", index=False)



print("Validation (Version 1) completed and saved.")



Processing row 1: Caffeine for cardiomyopathy due to anthracyclines
Processing row 2: Acetylcysteine for cardiomyopathy due to anthracyclines
Processing row 3: Cysteine for cardiomyopathy due to anthracyclines
Processing row 4: Vincristine for cardiomyopathy due to anthracyclines
Processing row 5: Clomifene for cardiomyopathy due to anthracyclines
Processing row 6: Histamine for cardiomyopathy due to anthracyclines
Processing row 7: Paclitaxel for cardiomyopathy due to anthracyclines
Processing row 8: Alprostadil for cardiomyopathy due to anthracyclines
Processing row 9: Methotrexate for cardiomyopathy due to anthracyclines
Processing row 10: Levothyroxine for cardiomyopathy due to anthracyclines
Processing row 11: Glycine for cardiomyopathy due to anthracyclines
Processing row 12: Hydrochlorothiazide for cardiomyopathy due to anthracyclines
Processing row 13: Erythromycin for cardiomyopathy due to anthracyclines
Processing row 14: Erythropoietin for cardiomyopathy due to anthracycline

In [None]:
import pandas as pd
import time
from openai import OpenAI

# Set your OpenAI API key by reading it from your Google Drive file
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

client = OpenAI(api_key=api_key)

# Function to construct the prompt for a given row in the complex file
def construct_complex_prompt(row):
    # Collect a few drug synonyms if available (concatenate non-null values)
    synonym_cols = [col for col in row.index if col.startswith("drug_synonym") and pd.notnull(row[col])]
    synonyms = ", ".join([str(row[col]) for col in synonym_cols])

    # Similarly, select the first two abstracts if available
    abstract_cols = ["abstract_1", "abstract_2"]
    abstracts = "; ".join([str(row[col]) for col in abstract_cols if pd.notnull(row[col])])

    prompt = (
        "You are an expert biomedical researcher. For the following complex drug–disease entry with synonyms and multiple abstract references, "
        "please verify the efficacy of the drug for the target disease using its mechanism, available abstracts, synonyms, and known literature.\n\n"
        f"Input Data:\n- Disease: {row['Disease_name']} (ID: {row['Disease_ID']})\n"
        f"- Drug: {row['Drug_name']} (ID: {row['Drug_ID']})\n"
        f"- Rank: {row['rank']}\n"
        f"- Description: {row['description']}\n"
        f"- Mechanism: {row['mechanism_of_action']}\n"
        f"- Drug Synonyms: {synonyms}\n"
        f"- Abstracts (first two): {abstracts}\n\n"
        "Please provide a concise validation explanation in no more than 50 words, addressing both the drug’s mechanism and the evidence from the abstracts. "
        "Then, summarize the agreement between your literature-based ranking and the model’s predicted rank for this disease.\n\n"
        "Output format:\nValidation: [short explanation]. Ranking summary: [short summary]."
    )
    return prompt

# Function to call the ChatGPT API for a single prompt using the new client
def call_gpt(prompt):
    try:
        completion = client.chat.completions.create(
            model="gpt-4o",  # Use a valid model name (adjust if needed)
            messages=[{"role": "user", "content": prompt}],
            #temperature=0.3,
            #max_tokens=200
        )
        return completion.choices[0].message.content.strip()
    except Exception as e:
        print("Error:", e)
        return None

# Load the complex CSV file from your Google Drive
df_complex = pd.read_csv("/content/drive/MyDrive/KG/drug_disease_with_numbered_abstracts_20250325_190703.csv")

# Optionally, filter by a specific drug, disease, or model:
# For example: df_complex = df_complex[df_complex['Drug_name'].str.contains("Caffeine", case=False)]

# Prepare a list to hold the results
complex_results = []

# Process each row one by one
for idx, row in df_complex.iterrows():
    prompt = construct_complex_prompt(row)
    print(f"Processing row {idx+1}: {row['Drug_name']} for {row['Disease_name']}")
    validation_output = call_gpt(prompt)
    # Pause briefly to respect rate limits
    time.sleep(1)
    complex_results.append({
        "row_index": idx,
        "Disease_ID": row["Disease_ID"],
        "Disease_name": row["Disease_name"],
        "Drug_ID": row["Drug_ID"],
        "Drug_name": row["Drug_name"],
        "Model_rank": row["rank"],
        "Validation_Output": validation_output
    })

# Create a DataFrame for the complex results and save to CSV in your Drive
complex_results_df = pd.DataFrame(complex_results)
complex_results_df.to_csv("/content/drive/MyDrive/KG/validation_results_complex.csv", index=False)

# Summarize the ranking comparison by disease
complex_ranking_summary = complex_results_df.groupby("Disease_name")["Model_rank"].describe()
print("Complex Ranking Summary by Disease:")
print(complex_ranking_summary)
complex_ranking_summary.to_csv("/content/drive/MyDrive/KG/ranking_summary_by_disease_complex.csv")


Processing row 1: Caffeine for cardiomyopathy due to anthracyclines
Processing row 2: Acetylcysteine for cardiomyopathy due to anthracyclines
Processing row 3: Cysteine for cardiomyopathy due to anthracyclines
Processing row 4: Vincristine for cardiomyopathy due to anthracyclines
Processing row 5: Clomifene for cardiomyopathy due to anthracyclines
Processing row 6: Histamine for cardiomyopathy due to anthracyclines
Processing row 7: Paclitaxel for cardiomyopathy due to anthracyclines
Processing row 8: Alprostadil for cardiomyopathy due to anthracyclines
Processing row 9: Methotrexate for cardiomyopathy due to anthracyclines
Processing row 10: Levothyroxine for cardiomyopathy due to anthracyclines
Processing row 11: Glycine for cardiomyopathy due to anthracyclines
Processing row 12: Hydrochlorothiazide for cardiomyopathy due to anthracyclines
Processing row 13: Erythromycin for cardiomyopathy due to anthracyclines
Processing row 14: Erythropoietin for cardiomyopathy due to anthracycline

In [None]:
import pandas as pd
import time
import random
from openai import OpenAI
from openai import RateLimitError, APIError

# Set your OpenAI API key
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

client = OpenAI(api_key=api_key)

# Set the API calling mode here. Options: "chat", "response", "response_web"
mode = "response_web"  # change as needed

def construct_comprehensive_prompt(row, max_abstracts=50):
    """
    Constructs a prompt with limited abstracts to avoid token limits.
    """
    # Start with base prompt
    prompt = (
        "You are an expert biomedical researcher. For the following drug–disease entry, "
        "please verify whether the drug is effective against the target disease based on "
        "its mechanism, known pharmacodynamics, and available literature.\n\n"
        "Input Data:\n"
    )

    # Only exclude these specific columns that you don't want
    excluded_cols = ['rank', 'is_fda_approved', 'pubmed_ids']
    excluded_prefixes = ['pubmed_id_', 'abstract_', 'title_']

    # Basic essential fields
    essential_fields = ['Disease_ID', 'Disease_name', 'Drug_ID', 'Drug_name',
                      'description', 'mechanism_of_action', 'pharmacodynamics', 'category']

    # Add essential fields first
    for field in essential_fields:
        if field in row and pd.notnull(row.get(field, pd.NA)):
            prompt += f"- {field}: {row[field]}\n"

    # Include some drug and disease synonyms (limited number)
    # Drug synonyms (up to 10)
    drug_synonyms = []
    for i in range(1, 11):  # Limit to 10 synonyms
        syn_col = f"drug_synonym{i}"
        if syn_col in row and pd.notnull(row.get(syn_col, pd.NA)):
            drug_synonyms.append(str(row[syn_col]))

    if drug_synonyms:
        prompt += f"- Drug Synonyms: {', '.join(drug_synonyms)}\n"

    # Disease synonyms (up to 10)
    disease_synonyms = []
    for i in range(1, 11):  # Limit to 10 synonyms
        syn_col = f"disease_synonym{i}"
        if syn_col in row and pd.notnull(row.get(syn_col, pd.NA)):
            disease_synonyms.append(str(row[syn_col]))

    if disease_synonyms:
        prompt += f"- Disease Synonyms: {', '.join(disease_synonyms)}\n"

    # Add a limited number of abstracts
    abstracts = []
    abstract_count = min(int(row.get('pubmed_id_count', 0)) if pd.notnull(row.get('pubmed_id_count', pd.NA)) else 0, max_abstracts)

    for i in range(1, abstract_count + 1):
        title_col = f"title_{i}"
        abstract_col = f"abstract_{i}"
        pubmed_id_col = f"pubmed_id_{i}"

        if (abstract_col in row.index and pubmed_id_col in row.index and
            pd.notnull(row.get(pubmed_id_col, pd.NA)) and pd.notnull(row.get(abstract_col, pd.NA))):

            title = row.get(title_col, '') if pd.notnull(row.get(title_col, pd.NA)) else 'No title'
            # Limit abstract length if needed
            abstract_text = row[abstract_col]
            if len(abstract_text) > 1500:  # Limit long abstracts
                abstract_text = abstract_text[:1500] + "..."

            abstracts.append(f"PubMed ID {row[pubmed_id_col]} - '{title}': {abstract_text}")

    if abstracts:
        prompt += "\nAbstracts:\n" + "\n\n".join(abstracts) + "\n"

    # Add conclusion request
    prompt += (
        "\nPlease provide your assessment in the following format:\n\n"
        "Result: [Yes/No/Not sure] (Choose only one)\n"
        "Validation: [short explanation, max 50 words]"
    )

    return prompt

def call_gpt_mode_with_retry(prompt, mode, max_retries=5):
    """
    Calls the GPT-4o API with exponential backoff retry logic for rate limits.
    """
    for attempt in range(max_retries):
        try:
            if mode == "chat":
                completion = client.chat.completions.create(
                    model="gpt-4o",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.2,
                )
                return completion.choices[0].message.content.strip()
            elif mode == "response":
                response = client.responses.create(
                    model="gpt-4o",
                    input=prompt,
                    temperature=0.2,
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            elif mode == "response_web":
                response = client.responses.create(
                    model="gpt-4o",
                    tools=[{"type": "web_search_preview"}],
                    input=prompt,
                    temperature=0.2,
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            else:
                raise ValueError("Invalid mode selected.")

        except Exception as e:
            print(f"Error during API call (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Exponential backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 5
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)

    return "Failed after maximum retry attempts"

def parse_validation_output(output):
    """
    Parse the validation output to extract Result and Validation separately.
    """
    result = "Unknown"
    validation = ""

    if output and not output.startswith("API call error"):
        # Try to find Result section
        result_match = pd.Series([line for line in output.split('\n') if line.startswith('Result:')]).str.extract(r'Result:\s*(Yes|No|Not sure)')
        if not result_match.empty and pd.notna(result_match.iloc[0, 0]):
            result = result_match.iloc[0, 0]

        # Try to find Validation section
        validation_lines = [line for line in output.split('\n') if line.startswith('Validation:')]
        if validation_lines:
            validation = validation_lines[0].replace('Validation:', '').strip()

    # If parsing failed, return the whole output as validation
    if result == "Unknown" and not validation:
        validation = output

    return result, validation

def run_validation(df, mode="response", filter_disease=None, row_range=None, batch_size=5, max_abstracts=50):
    """
    Run validation on selected rows of the dataset with batching.

    Parameters:
    - df: DataFrame containing drug-disease data
    - mode: API calling mode ("chat", "response", "response_web")
    - filter_disease: Filter by disease name (string or list of strings)
    - row_range: Tuple of (start_row, end_row) to process specific rows
    - batch_size: Number of rows to process before taking a longer pause
    - max_abstracts: Maximum number of abstracts to include per drug-disease pair

    Returns:
    - DataFrame with validation results
    """
    # Apply disease filter if specified
    if filter_disease:
        if isinstance(filter_disease, str):
            df_filtered = df[df['Disease_name'].str.contains(filter_disease, case=False)]
        elif isinstance(filter_disease, list):
            df_filtered = df[df['Disease_name'].isin(filter_disease)]
        else:
            raise ValueError("filter_disease must be a string or list of strings")
    else:
        df_filtered = df

    # Apply row range filter if specified
    if row_range and isinstance(row_range, tuple) and len(row_range) == 2:
        start_row, end_row = row_range
        df_filtered = df_filtered.iloc[start_row:end_row]

    print(f"Processing {len(df_filtered)} rows after filtering")

    results = []

    # Process in batches
    for batch_idx, batch_df in enumerate(np.array_split(df_filtered, max(1, len(df_filtered) // batch_size))):
        print(f"Processing batch {batch_idx+1}...")

        for idx, row in batch_df.iterrows():
            prompt = construct_comprehensive_prompt(row, max_abstracts=max_abstracts)
            print(f"Processing row {idx}: {row['Drug_name']} for {row['Disease_name']}")

            # Debug the first two prompts only
            if len(results) < 2:
                print("\n===== SAMPLE PROMPT (first 1500 chars) =====")
                print(prompt[:15000] + "..." if len(prompt) > 15000 else prompt)
                print("=========================================\n")

            # Call the API with retry logic
            validation_output = call_gpt_mode_with_retry(prompt, mode)

            # Debug the first two responses
            if len(results) < 2:
                print("\n===== SAMPLE RESPONSE =====")
                print(validation_output[:500] + "..." if len(validation_output) > 500 else validation_output)
                print("===========================\n")

            # Parse the validation output
            result, validation = parse_validation_output(validation_output)

            # Store results
            results.append({
                "row_index": idx,
                "Disease_ID": row["Disease_ID"],
                "Disease_name": row["Disease_name"],
                "Drug_ID": row["Drug_ID"],
                "Drug_name": row["Drug_name"],
                "Result": result,
                "Validation": validation,
                "Raw_Output": validation_output
            })

            # Short pause between API calls within a batch
            time.sleep(10)  # 2 seconds between calls

        # Longer pause between batches to avoid rate limits
        if batch_idx < len(np.array_split(df_filtered, max(1, len(df_filtered) // batch_size))) - 1:
            pause_time = 30  # 30 seconds between batches
            print(f"Batch complete. Pausing for {pause_time} seconds before next batch...")
            time.sleep(pause_time)

    # Create and return results DataFrame
    return pd.DataFrame(results)

# Main execution code
if __name__ == "__main__":
    # Import numpy for array_split
    import numpy as np

    # Load the data file
    file_path = "/content/drive/MyDrive/KG/drug_disease_with_numbered_abstracts_20250325_190703.csv"
    df = pd.read_csv(file_path)

    # Example usage with reduced abstracts and batching:
    results_df = run_validation(
        df,
        mode="response",
        #filter_disease="cardiomyopathy",
        batch_size=2,        # Process 5 rows per batch
        max_abstracts=50      # Include at most 2 abstracts per drug-disease pair
    )

    # Save results
    results_df.to_csv("/content/drive/MyDrive/KG/validation_results_full_r_t02.csv", index=False)
    print("Validation completed and saved.")

Processing 416 rows after filtering
Processing batch 1...
Processing row 0: Caffeine for cardiomyopathy due to anthracyclines

===== SAMPLE PROMPT (first 1500 chars) =====
You are an expert biomedical researcher. For the following drug–disease entry, please verify whether the drug is effective against the target disease based on its mechanism, known pharmacodynamics, and available literature.

Input Data:
- Disease_ID: 99214
- Disease_name: cardiomyopathy due to anthracyclines
- Drug_ID: 14783.0
- Drug_name: Caffeine
- description: Caffeine is a drug of the methylxanthine class used for a variety of purposes, including certain respiratory conditions of the premature newborn, pain relief, and to combat drowsiness. Caffeine is similar in chemical structure to and. It can be sourced from coffee beans, but also occurs naturally in various teas and cacao beans, which are different than coffee beans. Caffeine is also used in a variety of cosmetic products and can be administered topically, o

  return bound(*args, **kwds)



===== SAMPLE RESPONSE =====
Result: No

Validation: Caffeine's primary mechanism as a CNS stimulant and its effects on cardiac muscle contraction do not address the underlying pathophysiology of anthracycline-induced cardiomyopathy, which involves oxidative stress and mitochondrial damage. Available literature does not support caffeine as an effective treatment for this condition.

Processing row 1: Acetylcysteine for cardiomyopathy due to anthracyclines

===== SAMPLE PROMPT (first 1500 chars) =====
You are an expert biomedical researcher. For the following drug–disease entry, please verify whether the drug is effective against the target disease based on its mechanism, known pharmacodynamics, and available literature.

Input Data:
- Disease_ID: 99214
- Disease_name: cardiomyopathy due to anthracyclines
- Drug_ID: 16036.0
- Drug_name: Acetylcysteine
- description: Acetylcysteine (also known as N-acetylcysteine or N-acetyl-L-cysteine or NAC) is primarily used as a mucolytic agent and i

  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 2...
Processing row 2: Cysteine for cardiomyopathy due to anthracyclines
Processing row 3: Vincristine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 3...
Processing row 4: Clomifene for cardiomyopathy due to anthracyclines
Processing row 5: Histamine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 4...
Processing row 6: Paclitaxel for cardiomyopathy due to anthracyclines
Processing row 7: Alprostadil for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 5...
Processing row 8: Methotrexate for cardiomyopathy due to anthracyclines
Processing row 9: Levothyroxine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 6...
Processing row 10: Glycine for cardiomyopathy due to anthracyclines
Processing row 11: Hydrochlorothiazide for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 7...
Processing row 12: Erythromycin for cardiomyopathy due to anthracyclines
Processing row 13: Erythropoietin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 8...
Processing row 14: Aminobenzoic acid for cardiomyopathy due to anthracyclines
Processing row 15: Pravastatin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 9...
Processing row 16: Cyclosporine for cardiomyopathy due to anthracyclines
Processing row 17: Phenobarbital for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 10...
Processing row 18: Phenytoin for cardiomyopathy due to anthracyclines
Processing row 19: Lidocaine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 11...
Processing row 20: Dasatinib for cardiomyopathy due to anthracyclines
Processing row 21: Theophylline for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 12...
Processing row 22: Vardenafil for cardiomyopathy due to anthracyclines
Processing row 23: Fusidic acid for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 13...
Processing row 24: Penicillamine for cardiomyopathy due to anthracyclines
Processing row 25: Cisplatin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 14...
Processing row 26: Ascorbic acid for cardiomyopathy due to anthracyclines
Processing row 27: Morphine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 15...
Processing row 28: Ganciclovir for cardiomyopathy due to anthracyclines
Processing row 29: Thiotepa for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 16...
Processing row 30: Nicotinyl alcohol for cardiomyopathy due to anthracyclines
Processing row 31: Carbamazepine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 17...
Processing row 32: Choline for cardiomyopathy due to anthracyclines
Processing row 33: Pamidronic acid for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 18...
Processing row 34: Nizatidine for cardiomyopathy due to anthracyclines
Processing row 35: Digoxin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 19...
Processing row 36: Spironolactone for cardiomyopathy due to anthracyclines
Processing row 37: Etidronic acid for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 20...
Processing row 38: Ketoconazole for cardiomyopathy due to anthracyclines
Processing row 39: Cytarabine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 21...
Processing row 40: Tretinoin for cardiomyopathy due to anthracyclines
Processing row 41: Imatinib for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 22...
Processing row 42: Valproic acid for cardiomyopathy due to anthracyclines
Processing row 43: Ritonavir for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 23...
Processing row 44: Ferrous fumarate for cardiomyopathy due to anthracyclines
Processing row 45: Risedronic acid for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 24...
Processing row 46: Pazopanib for cardiomyopathy due to anthracyclines
Processing row 47: Peginesatide for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 25...
Processing row 48: Dexamethasone for cardiomyopathy due to anthracyclines
Processing row 49: Amiodarone for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 26...
Processing row 50: Acetylcysteine for doxorubicin induced cardiomyopathy
Processing row 51: Caffeine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 27...
Processing row 52: Cysteine for doxorubicin induced cardiomyopathy
Processing row 53: Histamine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 28...
Processing row 54: Clomifene for doxorubicin induced cardiomyopathy
Processing row 55: Paclitaxel for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 29...
Processing row 56: Alprostadil for doxorubicin induced cardiomyopathy
Processing row 57: Vincristine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 30...
Processing row 58: Methotrexate for doxorubicin induced cardiomyopathy
Processing row 59: Aminobenzoic acid for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 31...
Processing row 60: Glycine for doxorubicin induced cardiomyopathy
Processing row 61: Hydrochlorothiazide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 32...
Processing row 62: Erythromycin for doxorubicin induced cardiomyopathy
Processing row 63: Fusidic acid for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 33...
Processing row 64: Penicillamine for doxorubicin induced cardiomyopathy
Processing row 65: Levothyroxine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 34...
Processing row 66: Pravastatin for doxorubicin induced cardiomyopathy
Processing row 67: Erythropoietin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 35...
Processing row 68: Cisplatin for doxorubicin induced cardiomyopathy
Processing row 69: Pamidronic acid for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 36...
Processing row 70: Ganciclovir for doxorubicin induced cardiomyopathy
Processing row 71: Cyclosporine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 37...
Processing row 72: Cytarabine for doxorubicin induced cardiomyopathy
Processing row 73: Ascorbic acid for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 38...
Processing row 74: Theophylline for doxorubicin induced cardiomyopathy
Processing row 75: Calcium chloride for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 39...
Processing row 76: Etidronic acid for doxorubicin induced cardiomyopathy
Processing row 77: Thiotepa for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 40...
Processing row 78: Lidocaine for doxorubicin induced cardiomyopathy
Processing row 79: Dasatinib for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 41...
Processing row 80: Phenytoin for doxorubicin induced cardiomyopathy
Processing row 81: Peginesatide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 42...
Processing row 82: Phenobarbital for doxorubicin induced cardiomyopathy
Processing row 83: Daunorubicin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 43...
Processing row 84: Tretinoin for doxorubicin induced cardiomyopathy
Processing row 85: Potassium chloride for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 44...
Processing row 86: Nicotinyl alcohol for doxorubicin induced cardiomyopathy
Processing row 87: Spironolactone for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 45...
Processing row 88: Nizatidine for doxorubicin induced cardiomyopathy
Processing row 89: Chlorothiazide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 46...
Processing row 90: Carbamazepine for doxorubicin induced cardiomyopathy
Processing row 91: Ergocalciferol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 47...
Processing row 92: Digoxin for doxorubicin induced cardiomyopathy
Processing row 93: Imatinib for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 48...
Processing row 94: Dexamethasone for doxorubicin induced cardiomyopathy
Processing row 95: Ferrous fumarate for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 49...
Processing row 96: Carmustine for doxorubicin induced cardiomyopathy
Processing row 97: Choline for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 50...
Processing row 98: Vardenafil for doxorubicin induced cardiomyopathy
Processing row 99: Ketoconazole for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 51...
Processing row 100: Acefylline for cardiomyopathy due to anthracyclines
Processing row 101: Ropeginterferon alfa-2b for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 52...
Processing row 102: Bitolterol for cardiomyopathy due to anthracyclines
Processing row 103: Zileuton for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 53...
Processing row 104: Tulobuterol for cardiomyopathy due to anthracyclines
Processing row 105: Vilanterol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 54...
Processing row 106: Reproterol for cardiomyopathy due to anthracyclines
Processing row 107: Zafirlukast for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 55...
Processing row 108: Arformoterol for cardiomyopathy due to anthracyclines
Processing row 109: Interferon alfa-2b for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 56...
Processing row 110: Peginterferon alfa-2a for cardiomyopathy due to anthracyclines
Processing row 111: Formestane for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 57...
Processing row 112: Butylphthalide for cardiomyopathy due to anthracyclines
Processing row 113: Olodaterol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 58...
Processing row 114: Bencyclane for cardiomyopathy due to anthracyclines
Processing row 115: Benzyl alcohol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 59...
Processing row 116: Neltenexine for cardiomyopathy due to anthracyclines
Processing row 117: Garenoxacin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 60...
Processing row 118: Azidocillin for cardiomyopathy due to anthracyclines
Processing row 119: Pirbuterol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 61...
Processing row 120: Selexipag for cardiomyopathy due to anthracyclines
Processing row 121: Norepinephrine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 62...
Processing row 122: Tranilast for cardiomyopathy due to anthracyclines
Processing row 123: Pranlukast for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 63...
Processing row 124: Rucaparib for cardiomyopathy due to anthracyclines
Processing row 125: Interferon beta-1a for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 64...
Processing row 126: Cilostazol for cardiomyopathy due to anthracyclines
Processing row 127: Motretinide for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 65...
Processing row 128: Orciprenaline for cardiomyopathy due to anthracyclines
Processing row 129: Lomefloxacin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 66...
Processing row 130: Interferon beta-1b for cardiomyopathy due to anthracyclines
Processing row 131: Enasidenib for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 67...
Processing row 132: Ramatroban for cardiomyopathy due to anthracyclines
Processing row 133: Phenprocoumon for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 68...
Processing row 134: Rimegepant for cardiomyopathy due to anthracyclines
Processing row 135: Formoterol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 69...
Processing row 136: Midostaurin for cardiomyopathy due to anthracyclines
Processing row 137: Ibudilast for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 70...
Processing row 138: Teriflunomide for cardiomyopathy due to anthracyclines
Processing row 139: Dibromotyrosine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 71...
Processing row 140: Levosalbutamol for cardiomyopathy due to anthracyclines
Processing row 141: Besifloxacin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 72...
Processing row 142: Salmeterol for cardiomyopathy due to anthracyclines
Processing row 143: Ivosidenib for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 73...
Processing row 144: Bifemelane for cardiomyopathy due to anthracyclines
Processing row 145: Naftidrofuryl for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 74...
Processing row 146: Eribulin for cardiomyopathy due to anthracyclines
Processing row 147: Ozagrel for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 75...
Processing row 148: Ticagrelor for cardiomyopathy due to anthracyclines
Processing row 149: Theophylline for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 76...
Processing row 150: Acefylline for doxorubicin induced cardiomyopathy
Processing row 151: Ropeginterferon alfa-2b for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 77...
Processing row 152: Bitolterol for doxorubicin induced cardiomyopathy
Processing row 153: Zileuton for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 78...
Processing row 154: Tulobuterol for doxorubicin induced cardiomyopathy
Processing row 155: Vilanterol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 79...
Processing row 156: Reproterol for doxorubicin induced cardiomyopathy
Processing row 157: Zafirlukast for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 80...
Processing row 158: Arformoterol for doxorubicin induced cardiomyopathy
Processing row 159: Interferon alfa-2b for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 81...
Processing row 160: Peginterferon alfa-2a for doxorubicin induced cardiomyopathy
Processing row 161: Formestane for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 82...
Processing row 162: Butylphthalide for doxorubicin induced cardiomyopathy
Processing row 163: Olodaterol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 83...
Processing row 164: Bencyclane for doxorubicin induced cardiomyopathy
Processing row 165: Benzyl alcohol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 84...
Processing row 166: Neltenexine for doxorubicin induced cardiomyopathy
Processing row 167: Garenoxacin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 85...
Processing row 168: Azidocillin for doxorubicin induced cardiomyopathy
Processing row 169: Pirbuterol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 86...
Processing row 170: Selexipag for doxorubicin induced cardiomyopathy
Processing row 171: Norepinephrine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 87...
Processing row 172: Tranilast for doxorubicin induced cardiomyopathy
Processing row 173: Pranlukast for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 88...
Processing row 174: Rucaparib for doxorubicin induced cardiomyopathy
Processing row 175: Interferon beta-1a for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 89...
Processing row 176: Cilostazol for doxorubicin induced cardiomyopathy
Processing row 177: Motretinide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 90...
Processing row 178: Orciprenaline for doxorubicin induced cardiomyopathy
Processing row 179: Lomefloxacin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 91...
Processing row 180: Interferon beta-1b for doxorubicin induced cardiomyopathy
Processing row 181: Enasidenib for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 92...
Processing row 182: Ramatroban for doxorubicin induced cardiomyopathy
Processing row 183: Phenprocoumon for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 93...
Processing row 184: Rimegepant for doxorubicin induced cardiomyopathy
Processing row 185: Formoterol for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 94...
Processing row 186: Midostaurin for doxorubicin induced cardiomyopathy
Processing row 187: Ibudilast for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 95...
Processing row 188: Teriflunomide for doxorubicin induced cardiomyopathy
Processing row 189: Dibromotyrosine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 96...
Processing row 190: Levosalbutamol for doxorubicin induced cardiomyopathy
Processing row 191: Besifloxacin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 97...
Processing row 192: Salmeterol for doxorubicin induced cardiomyopathy
Processing row 193: Ivosidenib for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 98...
Processing row 194: Bifemelane for doxorubicin induced cardiomyopathy
Processing row 195: Naftidrofuryl for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 99...
Processing row 196: Eribulin for doxorubicin induced cardiomyopathy
Processing row 197: Ozagrel for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 100...
Processing row 198: Ticagrelor for doxorubicin induced cardiomyopathy
Processing row 199: Theophylline for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 101...
Processing row 200: Pyridostigmine for Alzheimer disease
Processing row 201: Prochlorperazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 102...
Processing row 202: Tretamine for Alzheimer disease
Processing row 203: Phenobarbital for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 103...
Processing row 204: Thiotepa for Alzheimer disease
Processing row 205: Mechlorethamine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 104...
Processing row 206: Perphenazine for Alzheimer disease
Processing row 207: Ropinirole for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 105...
Processing row 208: Loxapine for Alzheimer disease
Processing row 209: Alimemazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 106...
Processing row 210: Neostigmine for Alzheimer disease
Processing row 211: Carbamazepine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 107...
Processing row 212: Pimozide for Alzheimer disease
Processing row 213: Memantine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 108...
Processing row 214: Paliperidone for Alzheimer disease
Processing row 215: Haloperidol for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 109...
Processing row 216: Procainamide for Alzheimer disease
Processing row 217: Riluzole for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 110...
Processing row 218: Dextromethorphan for Alzheimer disease
Processing row 219: Minocycline for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 111...
Processing row 220: Zolpidem for Alzheimer disease
Processing row 221: Nizatidine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 112...
Processing row 222: Phenytoin for Alzheimer disease
Processing row 223: Acetophenazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 113...
Processing row 224: Aprotinin for Alzheimer disease
Processing row 225: Perazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 114...
Processing row 226: Diethylcarbamazine for Alzheimer disease
Processing row 227: Fluphenazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 115...
Processing row 228: Primidone for Alzheimer disease
Processing row 229: Magnesium for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 116...
Processing row 230: Nifurtimox for Alzheimer disease
Processing row 231: Deutetrabenazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 117...
Processing row 232: Levomethadone for Alzheimer disease
Processing row 233: Tetrabenazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 118...
Processing row 234: Adenosine phosphate  for Alzheimer disease
Processing row 235: Istradefylline  for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 119...
Processing row 236: Piribedil for Alzheimer disease
Processing row 237: Safinamide  for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 120...
Processing row 238: Valbenazine for Alzheimer disease
Processing row 239: Chlorpromazine  for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 121...
Processing row 240: Opicapone  for Alzheimer disease
Processing row 241: Cenegermin  for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 122...
Processing row 242: Pimavanserin  for Alzheimer disease
Processing row 243: Vinpocetine  for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 123...
Processing row 244: Arformoterol  for Alzheimer disease
Processing row 245: Alirocumab for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 124...
Processing row 246: Dihydroergocornine for Alzheimer disease
Processing row 247: Succinimide for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 125...
Processing row 248: Droxidopa for Alzheimer disease
Processing row 249: Apomorphine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 126...
Processing row 250: Levacetylmethadol for Alzheimer disease
Processing row 251: Dornase alfa for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 127...
Processing row 252: Elexacaftor for Alzheimer disease
Processing row 253: Triflupromazine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 128...
Processing row 254: Dalfampridine for Alzheimer disease
Processing row 255: Tolcapone for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 129...
Processing row 256: Clortermine for Alzheimer disease
Processing row 257: Pirfenidone for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 130...
Processing row 258: Sivelestat for Alzheimer disease
Processing row 259: Polidocanol for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 131...
Processing row 260: Hydrocortisone for Alzheimer disease
Processing row 261: Cortisone acetate for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 132...
Processing row 262: Hydrocortisone acetate for Alzheimer disease
Processing row 263: Dexamethasone for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 133...
Processing row 264: Prednisolone for Alzheimer disease
Processing row 265: Doxorubicin for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 134...
Processing row 266: Triamcinolone for Alzheimer disease
Processing row 267: Betamethasone for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 135...
Processing row 268: Prednisone for Alzheimer disease
Processing row 269: Methylprednisolone for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 136...
Processing row 270: Thiotepa for Alzheimer disease
Processing row 271: Fusidic acid for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 137...
Processing row 272: Carmustine* for Alzheimer disease
Processing row 273: Belotecan for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 138...
Processing row 274: Benzylpenicillin for Alzheimer disease
Processing row 275: Mechlorethamine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 139...
Processing row 276: Norfloxacin for Alzheimer disease
Processing row 277: Olaparib for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 140...
Processing row 278: Promazine for Alzheimer disease
Processing row 279: Tretamine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 141...
Processing row 280: Testosterone for Alzheimer disease
Processing row 281: Midostaurin for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 142...
Processing row 282: Naproxen for Alzheimer disease
Processing row 283: Ofloxacin for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 143...
Processing row 284: Doxycycline for Alzheimer disease
Processing row 285: Adalimumab for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 144...
Processing row 286: Histamine for Alzheimer disease
Processing row 287: Benzatropine for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 145...
Processing row 288: Procyclidine for Alzheimer disease
Processing row 289: Paclitaxel for Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 146...
Processing row 290: Dexrazoxane for doxorubicin induced cardiomyopathy
Processing row 291: Dexrazoxane for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 147...
Processing row 292: Minoxidil for cardiomyopathy due to anthracyclines
Processing row 293: Minoxidil for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 148...
Processing row 294: Carbamazepine for doxorubicin induced cardiomyopathy
Processing row 295: Carbamazepine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 149...
Processing row 296: Metformin for doxorubicin induced cardiomyopathy
Processing row 297: Metformin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 150...
Processing row 298: Loperamide for cardiomyopathy due to anthracyclines
Processing row 299: Loperamide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 151...
Processing row 300: Pravastatin for doxorubicin induced cardiomyopathy
Processing row 301: Pravastatin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 152...
Processing row 302: Quercetin for cardiomyopathy due to anthracyclines
Processing row 303: Quercetin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 153...
Processing row 304: Luteolin for cardiomyopathy due to anthracyclines
Processing row 305: Luteolin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 154...
Processing row 306: Strophanthidin for cardiomyopathy due to anthracyclines
Processing row 307: Strophanthidin for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 155...
Processing row 308: Verapamil for doxorubicin induced cardiomyopathy
Processing row 309: Verapamil for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 156...
Processing row 310: Ouabain for doxorubicin induced cardiomyopathy
Processing row 311: Ouabain for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 157...
Processing row 312: Clonidine for cardiomyopathy due to anthracyclines
Processing row 313: Clonidine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 158...
Processing row 314: Lanatoside C for doxorubicin induced cardiomyopathy
Processing row 315: Lanatoside C for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 159...
Processing row 316: Lycorine for doxorubicin induced cardiomyopathy
Processing row 317: Lycorine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 160...
Processing row 318: Rilmenidine for cardiomyopathy due to anthracyclines
Processing row 319: Rilmenidine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 161...
Processing row 320: Valproic acid for cardiomyopathy due to anthracyclines
Processing row 321: Valproic acid for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 162...
Processing row 322: Anisomycin for doxorubicin induced cardiomyopathy
Processing row 323: Anisomycin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 163...
Processing row 324: Temozolomide for doxorubicin induced cardiomyopathy
Processing row 325: Temozolomide for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 164...
Processing row 326: Digoxin for doxorubicin induced cardiomyopathy
Processing row 327: Digoxin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 165...
Processing row 328: Digitoxigenin for doxorubicin induced cardiomyopathy
Processing row 329: Digitoxigenin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 166...
Processing row 330: Nitrendipine for cardiomyopathy due to anthracyclines
Processing row 331: Nitrendipine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 167...
Processing row 332: Trifluoperazine for cardiomyopathy due to anthracyclines
Processing row 333: Trifluoperazine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 168...
Processing row 334: Amiodarone for cardiomyopathy due to anthracyclines
Processing row 335: Amiodarone for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 169...
Processing row 336: Parthenolide for cardiomyopathy due to anthracyclines
Processing row 337: Parthenolide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 170...
Processing row 338: Spironolactone for doxorubicin induced cardiomyopathy
Processing row 339: Spironolactone for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 171...
Processing row 340: Dasatinib for cardiomyopathy due to anthracyclines
Processing row 341: Dasatinib for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 172...
Processing row 342: Puromycin for doxorubicin induced cardiomyopathy
Processing row 343: Puromycin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 173...
Processing row 344: Kaempferol for doxorubicin induced cardiomyopathy
Processing row 345: Kaempferol for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 174...
Processing row 346: Nimodipine for cardiomyopathy due to anthracyclines
Processing row 347: Nimodipine for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 175...
Processing row 348: Sirolimus for cardiomyopathy due to anthracyclines
Processing row 349: Sirolimus for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 176...
Processing row 350: Verapamil for doxorubicin induced cardiomyopathy
Processing row 351: Verapamil for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 177...
Processing row 352: Tat-beclin 1 for cardiomyopathy due to anthracyclines
Processing row 353: Tat-beclin 1 for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 178...
Processing row 354: Cycloheximide for cardiomyopathy due to anthracyclines
Processing row 355: Cycloheximide for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 179...
Processing row 356: Amrinone for doxorubicin induced cardiomyopathy
Processing row 357: Amrinone for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 180...
Processing row 358: Emetine for doxorubicin induced cardiomyopathy
Processing row 359: Emetine for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 181...
Processing row 360: Proscillaridin for doxorubicin induced cardiomyopathy
Processing row 361: Proscillaridin for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 182...
Processing row 362: Helveticoside for doxorubicin induced cardiomyopathy
Processing row 363: Helveticoside for cardiomyopathy due to anthracyclines


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 183...
Processing row 364: Lithium for cardiomyopathy due to anthracyclines
Processing row 365: Lithium for doxorubicin induced cardiomyopathy


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 184...
Processing row 366: Cyclosporine for heart failure and Alzheimer disease
Processing row 367: Rivastigmine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 185...
Processing row 368: Digoxin for heart failure and Alzheimer disease
Processing row 369: Paclitaxel for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 186...
Processing row 370: Methotrexate for heart failure and Alzheimer disease
Processing row 371: Dexamethasone for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 187...
Processing row 372: Pregabalin for heart failure and Alzheimer disease
Processing row 373: Phenobarbital for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 188...
Processing row 374: Vincristine for heart failure and Alzheimer disease
Processing row 375: Carbamazepine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 189...
Processing row 376: Epicriptine for heart failure and Alzheimer disease
Processing row 377: Ibandronate for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 190...
Processing row 378: Chlorothiazide for heart failure and Alzheimer disease
Processing row 379: Paliperidone for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 191...
Processing row 380: Mechlorethamine for heart failure and Alzheimer disease
Processing row 381: Tacrolimus for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 192...
Processing row 382: Bortezomib for heart failure and Alzheimer disease
Processing row 383: Thiotepa for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 193...
Processing row 384: Ritonavir for heart failure and Alzheimer disease
Processing row 385: Prochlorperazine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 194...
Processing row 386: Sirolimus for heart failure and Alzheimer disease
Processing row 387: Pyridostigmine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 195...
Processing row 388: Cytarabine for heart failure and Alzheimer disease
Processing row 389: Pazopanib for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 196...
Processing row 390: Donepezil for heart failure and Alzheimer disease
Processing row 391: Procainamide for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 197...
Processing row 392: Everolimus for heart failure and Alzheimer disease
Processing row 393: Doxorubicin for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 198...
Processing row 394: Acetylcarnitine for heart failure and Alzheimer disease
Processing row 395: Phenytoin for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 199...
Processing row 396: Pramipexole for heart failure and Alzheimer disease
Processing row 397: Procarbazine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 200...
Processing row 398: Tacrine for heart failure and Alzheimer disease
Processing row 399: Memantine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 201...
Processing row 400: Haloperidol for heart failure and Alzheimer disease
Processing row 401: Hydrochlorothiazide for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 202...
Processing row 402: Ropinirole for heart failure and Alzheimer disease
Processing row 403: Nilotinib for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 203...
Processing row 404: Zoledronic acid for heart failure and Alzheimer disease
Processing row 405: Metformin for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 204...
Processing row 406: Sulfasalazine for heart failure and Alzheimer disease
Processing row 407: Amantadine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 205...
Processing row 408: Propranolol for heart failure and Alzheimer disease
Processing row 409: Carmustine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 206...
Processing row 410: Triamcinolone for heart failure and Alzheimer disease
Processing row 411: Bromocriptine for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 207...
Processing row 412: Etidronic acid for heart failure and Alzheimer disease
Processing row 413: Amiodarone for heart failure and Alzheimer disease


  return bound(*args, **kwds)


Batch complete. Pausing for 30 seconds before next batch...
Processing batch 208...
Processing row 414: Magnesium for heart failure and Alzheimer disease
Processing row 415: Tretinoin for heart failure and Alzheimer disease
Validation completed and saved.


  return bound(*args, **kwds)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import time
import random
import numpy as np
from openai import OpenAI
from openai import RateLimitError, APIError

# Set your OpenAI API key
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

client = OpenAI(api_key=api_key)

# Set the API calling mode here. Options: "chat", "response", "response_web"
mode = "response_web"  # change as needed

def construct_comprehensive_prompt(row, max_abstracts=50):
    """
    Constructs a comprehensive prompt with limited abstracts to avoid token limits.
    """
    # Start with base prompt
    prompt = (
        "You are an expert biomedical researcher. For the following drug–disease entry, "
        "please verify whether the drug is effective against the target disease based on "
        "its mechanism, known pharmacodynamics, and available literature.\n\n"
        "Input Data:\n"
    )

    # Only exclude these specific columns that you don't want
    excluded_cols = ['rank', 'is_fda_approved', 'pubmed_ids']
    excluded_prefixes = ['pubmed_id_', 'abstract_', 'title_']

    # Basic essential fields first
    essential_fields = ['Disease_ID', 'Disease_name', 'Drug_ID', 'Drug_name',
                      'description', 'mechanism_of_action', 'pharmacodynamics', 'category']

    # Add essential fields first
    for field in essential_fields:
        if field in row and pd.notnull(row.get(field, pd.NA)):
            prompt += f"- {field}: {row[field]}\n"

    # Include other non-excluded fields
    for col in row.index:
        # Skip already added essential fields
        if col in essential_fields:
            continue

        # Skip excluded columns
        if col in excluded_cols:
            continue

        # Skip abstract, title, and pubmed_id columns
        if any(col.startswith(prefix) for prefix in excluded_prefixes):
            continue

        # Include other columns with values (limited to reasonable length)
        if pd.notnull(row.get(col, pd.NA)):
            value = str(row[col])
            if len(value) > 1000:  # Limit very long field values
                value = value[:1000] + "..."
            prompt += f"- {col}: {value}\n"

    # Add a limited number of abstracts
    abstracts = []
    abstract_count = int(row.get('pubmed_id_count', 0)) if pd.notnull(row.get('pubmed_id_count', pd.NA)) else 0

    # Limit to max_abstracts (default 50)
    abstract_count = min(abstract_count, max_abstracts)

    for i in range(1, abstract_count + 1):
        title_col = f"title_{i}"
        abstract_col = f"abstract_{i}"
        pubmed_id_col = f"pubmed_id_{i}"

        # Check if these columns exist and have values
        if (abstract_col in row.index and pubmed_id_col in row.index and
            pd.notnull(row.get(pubmed_id_col, pd.NA)) and pd.notnull(row.get(abstract_col, pd.NA))):

            title = row.get(title_col, '') if pd.notnull(row.get(title_col, pd.NA)) else 'No title'

            # Limit abstract length to reduce token usage
            abstract_text = row[abstract_col]
            if len(abstract_text) > 1500:  # Limit long abstracts
                abstract_text = abstract_text[:1500] + "..."

            abstracts.append(f"PubMed ID {row[pubmed_id_col]} - '{title}': {abstract_text}")

    if abstracts:
        prompt += "\nAbstracts:\n" + "\n\n".join(abstracts) + "\n"

    # Add conclusion request with specific format for Yes/No/Not sure + explanation
    prompt += (
        "\nPlease provide your assessment in the following format:\n\n"
        "Result: [Yes/No/Not sure] (Choose only one)\n"
        "Validation: [short explanation, max 50 words]"
    )

    return prompt

def call_gpt_mode_with_retry(prompt, mode, max_retries=5):
    """
    Calls the GPT-4o API with exponential backoff retry logic for rate limits.
    """
    for attempt in range(max_retries):
        try:
            if mode == "chat":
                completion = client.chat.completions.create(
                    model="gpt-4o",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.2,  # Lower temperature for more consistent results
                )
                return completion.choices[0].message.content.strip()
            elif mode == "response":
                response = client.responses.create(
                    model="gpt-4o",
                    input=prompt,
                    temperature=0.2,
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "role") and item.role == "assistant":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                    # Also check for message type
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            elif mode == "response_web":
                response = client.responses.create(
                    model="gpt-4o",
                    tools=[{"type": "web_search_preview"}],
                    input=prompt,
                    temperature=0.2,
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "role") and item.role == "assistant":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                    # Also check for message type
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            else:
                raise ValueError("Invalid mode selected.")

        except (RateLimitError, APIError) as e:
            print(f"API error during call (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Exponential backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 5
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)
        except Exception as e:
            print(f"Unexpected error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Shorter wait for non-rate-limit errors
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)

    return "Failed after maximum retry attempts"

def parse_validation_output(output):
    """
    Parse the validation output to extract Result and Validation separately.
    Expected format:
    Result: [Yes/No/Not sure]
    Validation: [explanation]
    """
    result = "Unknown"
    validation = ""

    if output and not output.startswith("API call error"):
        # Try to find Result section
        result_match = pd.Series([line for line in output.split('\n') if line.startswith('Result:')]).str.extract(r'Result:\s*(Yes|No|Not sure)')
        if not result_match.empty and pd.notna(result_match.iloc[0, 0]):
            result = result_match.iloc[0, 0]

        # Try to find Validation section
        validation_lines = [line for line in output.split('\n') if line.startswith('Validation:')]
        if validation_lines:
            validation = validation_lines[0].replace('Validation:', '').strip()

    # If parsing failed, return the whole output as validation
    if result == "Unknown" and not validation:
        validation = output

    return result, validation

def run_validation(df, mode="response_web", filter_disease=None, row_range=None, batch_size=5, max_abstracts=50):
    """
    Run validation on selected rows of the dataset with batching.

    Parameters:
    - df: DataFrame containing drug-disease data
    - mode: API calling mode ("chat", "response", "response_web")
    - filter_disease: Filter by disease name (string or list of strings)
    - row_range: Tuple of (start_row, end_row) to process specific rows
    - batch_size: Number of rows to process before taking a longer pause
    - max_abstracts: Maximum number of abstracts to include per drug-disease pair

    Returns:
    - DataFrame with validation results
    """
    # Apply disease filter if specified
    if filter_disease:
        if isinstance(filter_disease, str):
            df_filtered = df[df['Disease_name'].str.contains(filter_disease, case=False)]
        elif isinstance(filter_disease, list):
            df_filtered = df[df['Disease_name'].isin(filter_disease)]
        else:
            raise ValueError("filter_disease must be a string or list of strings")
    else:
        df_filtered = df

    # Apply row range filter if specified
    if row_range and isinstance(row_range, tuple) and len(row_range) == 2:
        start_row, end_row = row_range
        df_filtered = df_filtered.iloc[start_row:end_row]

    print(f"Processing {len(df_filtered)} rows after filtering")

    results = []

    # Process in batches
    for batch_idx, batch_df in enumerate(np.array_split(df_filtered, max(1, len(df_filtered) // batch_size))):
        print(f"Processing batch {batch_idx+1}...")

        for idx, row in batch_df.iterrows():
            prompt = construct_comprehensive_prompt(row, max_abstracts=max_abstracts)
            print(f"Processing row {idx}: {row['Drug_name']} for {row['Disease_name']}")

            # Debug the first few prompts
            if len(results) < 2:
                print("\n===== SAMPLE PROMPT (first 500 chars) =====")
                print(prompt[:500] + "..." if len(prompt) > 500 else prompt)
                print("=========================================\n")

            # Call the API with retry logic
            validation_output = call_gpt_mode_with_retry(prompt, mode)

            # Debug the first few responses
            if len(results) < 2:
                print("\n===== SAMPLE RESPONSE =====")
                print(validation_output[:500] + "..." if len(validation_output) > 500 else validation_output)
                print("===========================\n")

            # Parse the validation output
            result, validation = parse_validation_output(validation_output)

            # Store results
            results.append({
                "row_index": idx,
                "Disease_ID": row["Disease_ID"],
                "Disease_name": row["Disease_name"],
                "Drug_ID": row["Drug_ID"],
                "Drug_name": row["Drug_name"],
                "Result": result,
                "Validation": validation,
                "Raw_Output": validation_output
            })

            # Short pause between API calls within a batch
            time.sleep(2)  # 2 seconds between calls

        # Longer pause between batches to avoid rate limits
        if batch_idx < len(np.array_split(df_filtered, max(1, len(df_filtered) // batch_size))) - 1:
            pause_time = 30  # 30 seconds between batches
            print(f"Batch complete. Pausing for {pause_time} seconds before next batch...")
            time.sleep(pause_time)

    # Create and return results DataFrame
    return pd.DataFrame(results)

# Main execution code
if __name__ == "__main__":
    # Load the data file
    file_path = "/content/drive/MyDrive/KG/drug_disease_with_numbered_abstracts_20250325_190703.csv"
    df = pd.read_csv(file_path)

    # Example usage:
    # 1. Run for a specific disease
    results_df = run_validation(
        df,
        mode="response_web",
        filter_disease="cardiomyopathy",
        batch_size=5,         # Process 5 rows per batch
        max_abstracts=20      # Include at most 20 abstracts per drug-disease pair
    )
    results_df.to_csv("/content/drive/MyDrive/KG/validation_results_cardiomyopathy.csv", index=False)

    # 2. Run for a specific row range (first 10 rows)
    # results_df = run_validation(
    #     df,
    #     mode="chat",
    #     row_range=(0, 10),
    #     batch_size=5,
    #     max_abstracts=20
    # )
    # results_df.to_csv("/content/drive/MyDrive/KG/validation_results_first10.csv", index=False)

    # 3. Run for multiple specific diseases
    # results_df = run_validation(
    #     df,
    #     mode="response",
    #     filter_disease=["asthma", "diabetes"],
    #     batch_size=5,
    #     max_abstracts=20
    # )
    # results_df.to_csv("/content/drive/MyDrive/KG/validation_results_asthma_diabetes.csv", index=False)

    print("Validation completed and saved.")

Processing 276 rows after filtering
Processing batch 1...
Processing row 0: Caffeine for cardiomyopathy due to anthracyclines

===== SAMPLE PROMPT (first 500 chars) =====
You are an expert biomedical researcher. For the following drug–disease entry, please verify whether the drug is effective against the target disease based on its mechanism, known pharmacodynamics, and available literature.

Input Data:
- Disease_ID: 99214
- Disease_name: cardiomyopathy due to anthracyclines
- Drug_ID: 14783.0
- Drug_name: Caffeine
- description: Caffeine is a drug of the methylxanthine class used for a variety of purposes, including certain respiratory conditions of the prematu...

Unexpected error (attempt 1/5): 'OpenAI' object has no attribute 'responses'
Waiting 1.33 seconds before retrying...


  return bound(*args, **kwds)


Unexpected error (attempt 2/5): 'OpenAI' object has no attribute 'responses'
Waiting 2.57 seconds before retrying...
Unexpected error (attempt 3/5): 'OpenAI' object has no attribute 'responses'
Waiting 4.52 seconds before retrying...


KeyboardInterrupt: 

In [None]:
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.75.0-py3-none-any.whl.metadata (25 kB)
Downloading openai-1.75.0-py3-none-any.whl (646 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.0/647.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.61.1
    Uninstalling openai-1.61.1:
      Successfully uninstalled openai-1.61.1
Successfully installed openai-1.75.0


In [None]:
import pandas as pd
import time
import random
import numpy as np
from openai import OpenAI
from openai import RateLimitError, APIError
from collections import Counter

# Set your OpenAI API key
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

client = OpenAI(api_key=api_key)

# Set the API calling mode here. Options: "chat", "response", "response_web"
mode = "response"  # Using regular response mode since we're focusing on abstract analysis

def construct_comprehensive_prompt(row, abstract):
    """
    Constructs a comprehensive prompt including requested drug and disease information,
    with focus on analyzing the abstract to determine drug effectiveness.

    Parameters:
    - row: DataFrame row containing drug and disease information
    - abstract: PubMed abstract text

    Returns:
    - Formatted prompt string
    """
    drug_name = row['Drug_name']
    disease_name = row['Disease_name']

    # Start with base prompt
    prompt = (
        f"You are an expert biomedical researcher. Determine whether {drug_name} is effective "
        f"against {disease_name} based on the provided information, with primary focus on the abstract content.\n\n"
        f"DRUG INFORMATION:\n"
        f"Drug: {drug_name}\n"
    )

    # Add key drug information fields
    key_fields = ['description', 'mechanism_of_action', 'protein_binding', 'pharmacodynamics', 'category']
    for field in key_fields:
        if field in row and pd.notnull(row[field]) and row[field]:
            value = str(row[field])
            # # Limit length of very long text
            # if len(value) > 300:
            #     value = value[:300] + "..."
            prompt += f"{field.replace('_', ' ').title()}: {value}\n"

    # Add drug synonyms
    drug_synonyms = []
    for i in range(1, 11):  # drug_synonym1 through drug_synonym21
        syn_col = f"drug_synonym{i}"
        if syn_col in row and pd.notnull(row[syn_col]) and row[syn_col]:
            drug_synonyms.append(str(row[syn_col]))

    if drug_synonyms:
        prompt += f"Drug Synonyms: {', '.join(drug_synonyms)}\n"

    # Add disease information
    prompt += f"\nDISEASE INFORMATION:\n"
    prompt += f"Disease: {disease_name}\n"

    # Add disease synonyms
    disease_synonyms = []
    for i in range(1, 11):  # disease_synonym1 through disease_synonym15
        syn_col = f"disease_synonym{i}"
        if syn_col in row and pd.notnull(row[syn_col]) and row[syn_col]:
            disease_synonyms.append(str(row[syn_col]))

    if disease_synonyms:
        prompt += f"Disease Synonyms: {', '.join(disease_synonyms)}\n"

    # Add the abstract (as the main factor)
    prompt += f"\nABSTRACT TEXT (primary evidence source):\n{abstract}\n\n"

    # Add conclusion request
    prompt += (
        "Based primarily on the abstract, and considering the drug and disease information provided, "
        "assess whether the drug is effective for the disease. Provide your assessment in the following format:\n\n"
        "Result: [Positive/Neutral/Negative] (Choose exactly one, where Positive means the drug is effective, "
        "Neutral means uncertain or insufficient evidence, and Negative means the drug is ineffective or harmful)\n"
        "Explanation: [provide a brief explanation in 2-3 sentences focusing mainly on evidence from the abstract]"
    )

    return prompt

def call_gpt_with_retry(prompt, mode, max_retries=5):
    """
    Calls the GPT-4o API with exponential backoff retry logic for rate limits.
    """
    for attempt in range(max_retries):
        try:
            if mode == "chat":
                completion = client.chat.completions.create(
                    model="gpt-4o",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.1,  # Lower temperature for more consistent results
                )
                return completion.choices[0].message.content.strip()
            elif mode == "response":
                response = client.responses.create(
                    model="gpt-4.1",
                    input=prompt,
                    temperature=0.2,  # Lower temperature for more consistent results
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            elif mode == "response_web":
                response = client.responses.create(
                    model="gpt-4o",
                    tools=[{"type": "web_search_preview"}],
                    input=prompt,
                    temperature=0.1,  # Lower temperature for more consistent results
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            else:
                raise ValueError("Invalid mode selected.")

        except RateLimitError as e:
            print(f"Rate limit error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Exponential backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 20  # Longer wait for rate limits
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)
        except APIError as e:
            print(f"API error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Standard backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 5
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)
        except Exception as e:
            print(f"Unexpected error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Shorter wait for non-rate-limit errors
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)

    return "Failed after maximum retry attempts"

def parse_assessment_output(output):
    """
    Parse the assessment output to extract Result and Explanation.
    Expected format:
    Result: [Positive/Neutral/Negative]
    Explanation: [explanation]
    """
    import re

    result = "Unknown"
    explanation = ""

    if output and not output.startswith("API call error"):
        # Try to find Result section using case-insensitive matching
        result_match = re.search(r'result:\s*(positive|neutral|negative)', output.lower())
        if result_match:
            result_value = result_match.group(1)
            # Convert to proper case format
            result = result_value.capitalize()

        # Try alternate format if not found (sometimes GPT outputs "Result - Positive" format)
        if result == "Unknown":
            alt_result_match = re.search(r'result\s*[-:]\s*(positive|neutral|negative)', output.lower())
            if alt_result_match:
                result_value = alt_result_match.group(1)
                result = result_value.capitalize()

        # Try to find Explanation section using case-insensitive matching
        explanation_match = re.search(r'explanation:\s*(.*?)(?:\n\n|\n*$)', output, re.IGNORECASE | re.DOTALL)
        if explanation_match:
            explanation = explanation_match.group(1).strip()
        else:
            # Try alternate format or look for any text after the result
            after_result = re.search(r'(positive|neutral|negative)[.:]\s*(.*?)(?:\n\n|\n*$)', output.lower(), re.DOTALL)
            if after_result:
                explanation = after_result.group(2).strip()

    # If parsing failed, return the whole output as explanation
    if result == "Unknown" and not explanation:
        explanation = output
        # Make final attempt to extract result from the text
        if "positive" in output.lower() and "negative" not in output.lower():
            result = "Positive"
        elif "negative" in output.lower() and "positive" not in output.lower():
            result = "Negative"
        elif "neutral" in output.lower() or "insufficient evidence" in output.lower():
            result = "Neutral"

    return result, explanation

def analyze_drug_disease_abstracts(df, mode="response", filter_disease=None, filter_drug=None, batch_size=5, max_abstracts_per_pair=50):
    """
    Analyze abstracts for each drug-disease pair to determine effectiveness.

    Parameters:
    - df: DataFrame containing drug-disease-abstract data
    - mode: API calling mode ("chat", "response", "response_web")
    - filter_disease: Filter by disease name (string or list of strings)
    - filter_drug: Filter by drug name (string or list of strings)
    - batch_size: Number of abstracts to process before taking a longer pause
    - max_abstracts_per_pair: Maximum number of abstracts to analyze per drug-disease pair

    Returns:
    - DataFrame with analysis results for each abstract
    - DataFrame with statistical summary for each drug-disease pair
    """
    import re

    # Apply disease filter if specified
    if filter_disease:
        if isinstance(filter_disease, str):
            df_filtered = df[df['Disease_name'].str.contains(filter_disease, case=False)]
        elif isinstance(filter_disease, list):
            df_filtered = df[df['Disease_name'].isin(filter_disease)]
        else:
            raise ValueError("filter_disease must be a string or list of strings")
    else:
        df_filtered = df

    # Apply drug filter if specified
    if filter_drug:
        if isinstance(filter_drug, str):
            df_filtered = df_filtered[df_filtered['Drug_name'].str.contains(filter_drug, case=False)]
        elif isinstance(filter_drug, list):
            df_filtered = df_filtered[df_filtered['Drug_name'].isin(filter_drug)]
        else:
            raise ValueError("filter_drug must be a string or list of strings")

    print(f"Processing {len(df_filtered)} rows after filtering")

    # Before groupby, create a copy with proper handling for empties and "none"
    df_for_grouping = df_filtered.copy()

    # Helper function to standardize empty/none values
    def standardize_value(val):
        if pd.isna(val):  # Handles empty cells (NaN/None)
          return 'Missing_Value'
        elif isinstance(val, str) and val.lower() in ('none', 'null', ''):  # Handles string "none"/"null"
          return 'Missing_Value'
        else:
          return val

    # Apply standardization to key fields
    df_for_grouping['Drug_ID'] = df_for_grouping['Drug_ID'].apply(standardize_value)
    df_for_grouping['Disease_ID'] = df_for_grouping['Disease_ID'].apply(standardize_value)
    df_for_grouping['Model'] = df_for_grouping['Model'].apply(standardize_value)

    # First, identify all unique drug-disease pairs and their total abstract counts
    pairs_with_count = df_filtered.groupby(['Drug_ID', 'Disease_ID', 'Model']).agg({
        'Drug_name': 'first',
        'Disease_name': 'first',
        'pubmed_id_count': 'first',
        'pubmed_id': 'count'
    }).reset_index()

    pairs_with_count.rename(columns={'pubmed_id': 'available_abstracts'}, inplace=True)
    print(f"Found {len(pairs_with_count)} unique drug-disease-model pairs")

    # Sort pairs by available abstracts (highest first) to prioritize pairs with more evidence
    # pairs_with_count = pairs_with_count.sort_values('available_abstracts', ascending=False)

    pairs_with_count = pairs_with_count.sort_values(['Disease_name', 'Drug_name'], ascending=[True, True])

    abstract_results = []
    summary_results = []

    # Process each drug-disease-model pair
    for _, pair_row in pairs_with_count.iterrows():
        drug_id = pair_row['Drug_ID']
        disease_id = pair_row['Disease_ID']
        model = pair_row['Model']
        drug_name = pair_row['Drug_name']
        disease_name = pair_row['Disease_name']
        total_abstracts_count = pair_row['pubmed_id_count']
        available_abstracts = pair_row['available_abstracts']

        print(f"\nAnalyzing {drug_name} for {disease_name} ({available_abstracts} abstracts available out of {total_abstracts_count} total)")

        # Get all rows for this drug-disease pair cannit handle missing value fo ID or Model
        # group = df_filtered[(df_filtered['Drug_ID'] == drug_id) & (df_filtered['Disease_ID'] == disease_id) & (df_filtered['Model'] == model)]

        def is_empty_value(val):
          return (pd.isna(val) or
              (isinstance(val, str) and val.lower() in ('none', 'null', '')))

        if drug_id == 'Missing_Value':
            drug_filter = df_filtered.apply(lambda row: is_empty_value(row['Drug_ID']), axis=1)
        else:
            drug_filter = df_filtered['Drug_ID'] == drug_id

        if disease_id == 'Missing_Value':
           disease_filter = df_filtered.apply(lambda row: is_empty_value(row['Disease_ID']), axis=1)
        else:
            disease_filter = df_filtered['Disease_ID'] == disease_id

        if model == 'Missing_Value':
            model_filter = df_filtered.apply(lambda row: is_empty_value(row['Model']), axis=1)
        else:
            model_filter = df_filtered['Model'] == model

        # Get all rows for this drug-disease-model combination
        group = df_filtered[drug_filter & disease_filter & model_filter]

        # Limit abstracts to process if needed
        if len(group) > max_abstracts_per_pair:
            print(f"  Limiting analysis to {max_abstracts_per_pair} abstracts out of {len(group)}")
            group = group.head(max_abstracts_per_pair)

        pair_results = []

        # Process abstracts in batches
        for batch_idx, batch_df in enumerate(np.array_split(group, max(1, len(group) // batch_size))):
            print(f"  Processing batch {batch_idx+1} of {max(1, len(group) // batch_size)}...")

            for idx, row in batch_df.iterrows():
                # If abstract is missing or too short, use title or other available information
                abstract_text = row.get('abstract', '')
                if pd.isnull(abstract_text) or len(str(abstract_text)) < 50:
                  # Use title if abstract is missing, or combine them if abstract is just short
                  title_text = row.get('title', 'No title available')
                  if pd.isnull(abstract_text) or len(str(abstract_text)) == 0:
                    print(f"  Abstract missing for row {idx} - Using title only: {title_text}")
                    abstract_text = f"TITLE ONLY: {title_text}"
                  else:
                    print(f"  Short abstract for row {idx} - Supplementing with title")
                    abstract_text = f"TITLE: {title_text}\nSHORT ABSTRACT: {abstract_text}"

                # Get PubMed ID and title for reference
                pubmed_id = row.get('pubmed_id', 'Unknown')
                title = row.get('title', 'No title')

                print(f"  Processing abstract {idx} - PubMed ID: {pubmed_id}")

                # Use the comprehensive prompt construction
                prompt = construct_comprehensive_prompt(row, row['abstract'])

                # Debug the first prompt in the first batch
                if batch_idx == 0 and len(pair_results) == 0:
                    print("\n===== SAMPLE PROMPT (first 500 chars) =====")
                    print(prompt[:500] + "..." if len(prompt) > 500 else prompt)
                    print("=========================================\n")

                # Call the API
                output = call_gpt_with_retry(prompt, mode)

                # Debug the first response
                if batch_idx == 0 and len(pair_results) == 0:
                    print("\n===== SAMPLE RESPONSE =====")
                    print(output[:500] + "..." if len(output) > 500 else output)
                    print("===========================\n")

                # Parse the output
                result, explanation = parse_assessment_output(output)

                # Store results for this abstract
                abstract_result = {
                    "Drug_ID": drug_id,
                    "Drug_name": drug_name,
                    "Disease_ID": disease_id,
                    "Disease_name": disease_name,
                    "PubMed_ID": pubmed_id,
                    "Title": title,
                    "Model": row.get('Model', 'Unknown'),
                    "Result": result,
                    "Explanation": explanation,
                    "Raw_Output": output
                }

                abstract_results.append(abstract_result)
                pair_results.append(abstract_result)

                # Pause between API calls
                #time.sleep(2)

            # Longer pause between batches
            if batch_idx < max(1, len(group) // batch_size) - 1:
                pause_time = 1
                print(f"  Batch complete. Pausing for {pause_time} seconds...")
                time.sleep(pause_time)

        # Calculate statistics for this drug-disease pair
        result_counts = Counter([r["Result"] for r in pair_results])
        analyzed_results = len(pair_results)

        if analyzed_results > 0:
            summary = {
                "Drug_ID": drug_id,
                "Drug_name": drug_name,
                "Disease_ID": disease_id,
                "Disease_name": disease_name,
                "Model": model,
                "Total_Possible_Abstracts": total_abstracts_count,
                "Available_Abstracts": available_abstracts,
                "Analyzed_Abstracts": analyzed_results,
                "Positive_Count": result_counts.get("Positive", 0),
                "Neutral_Count": result_counts.get("Neutral", 0),
                "Negative_Count": result_counts.get("Negative", 0),
                "Unknown_Count": result_counts.get("Unknown", 0),
                "Positive_Percent": (result_counts.get("Positive", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Neutral_Percent": (result_counts.get("Neutral", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Negative_Percent": (result_counts.get("Negative", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Unknown_Percent": (result_counts.get("Unknown", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Overall_Assessment": "Inconclusive"  # Will be updated below
            }

            # Determine overall assessment based on percentages
            if summary["Positive_Percent"] >= 60:
                summary["Overall_Assessment"] = "Positive"
            elif summary["Negative_Percent"] >= 60:
                summary["Overall_Assessment"] = "Negative"
            elif summary["Neutral_Percent"] >= 60:
                summary["Overall_Assessment"] = "Neutral"
            # If one category is significantly more common than others
            elif (summary["Positive_Percent"] > summary["Neutral_Percent"] + 20 and
                  summary["Positive_Percent"] > summary["Negative_Percent"] + 20):
                summary["Overall_Assessment"] = "Likely Positive"
            elif (summary["Negative_Percent"] > summary["Neutral_Percent"] + 20 and
                  summary["Negative_Percent"] > summary["Positive_Percent"] + 20):
                summary["Overall_Assessment"] = "Likely Negative"

            summary_results.append(summary)

            print(f"\nSummary for {drug_name} and {disease_name}:")
            print(f"  Total possible abstracts: {total_abstracts_count}")
            print(f"  Available abstracts: {available_abstracts}")
            print(f"  Abstracts analyzed: {analyzed_results}")
            print(f"  Positive: {summary['Positive_Count']} ({summary['Positive_Percent']:.1f}%)")
            print(f"  Neutral: {summary['Neutral_Count']} ({summary['Neutral_Percent']:.1f}%)")
            print(f"  Negative: {summary['Negative_Count']} ({summary['Negative_Percent']:.1f}%)")
            print(f"  Unknown: {summary['Unknown_Count']} ({summary['Unknown_Percent']:.1f}%)")
            print(f"  Overall assessment: {summary['Overall_Assessment']}")

    # Create DataFrames
    abstract_df = pd.DataFrame(abstract_results)
    summary_df = pd.DataFrame(summary_results)

    return abstract_df, summary_df

# Main execution code
if __name__ == "__main__":
    import re  # For regex in parse_assessment_output

    # Load the data file
    file_path = "/content/drive/MyDrive/KG/drug_disease_long_format_20250325_190703.csv"
    df = pd.read_csv(file_path)

    # Example usage patterns:

    # 1. Run for a specific disease
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease="cardiomyopathy due to anthracyclines",
    #     batch_size=3,        # Process 3 abstracts per batch to avoid rate limits
    #     max_abstracts_per_pair=20  # Limit analysis to 20 abstracts per drug-disease pair
    # )

    # # Save results with timestamp to avoid overwriting previous analyses
    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_csv(f"/content/drive/MyDrive/KG/abstract_analysis_cardiomyopathy_{timestamp}.csv", index=False)
    # summary_results.to_csv(f"/content/drive/MyDrive/KG/summary_analysis_cardiomyopathy_{timestamp}.csv", index=False)

    print("Analysis completed and saved.")

    # Uncomment and modify as needed for other analysis scenarios:

    # 2. Run for a specific drug and disease combination
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease="cardiomyopathy due to anthracyclines",
    #     filter_drug="Caffeine",
    #     batch_size=5,
    #     max_abstracts_per_pair=60
    # )

    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/abstract_analysis_caffeine_cardiomyopathy_{timestamp}.xlsx", index=False, engine='openpyxl')
    # summary_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/summary_analysis_caffeine_cardiomyopathy_{timestamp}.xlsx", index=False, engine='openpyxl')

    # 3. Run for multiple diseases
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease=["diabetes", "asthma"],
    #     batch_size=3,
    #     max_abstracts_per_pair=15
    # )
    #
    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_csv(f"/content/drive/MyDrive/KG/abstract_analysis_diabetes_asthma_{timestamp}.csv", index=False)
    # summary_results.to_csv(f"/content/drive/MyDrive/KG/summary_analysis_diabetes_asthma_{timestamp}.csv", index=False)
    #
    # 4. Run for all data with reasonable limits
    abstract_results, summary_results = analyze_drug_disease_abstracts(
        df, #.sample(frac=1).reset_index(drop=True),  # Shuffle data to get diverse samples
        mode="response",
        batch_size=30,
        max_abstracts_per_pair=200  # Limit to 10 abstracts per pair for full dataset
    )

    timestamp = time.strftime("%Y%m%d_%H%M%S")
    abstract_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/abstract_analysis_full_{timestamp}.xlsx", index=False, engine='openpyxl')
    summary_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/summary_analysis_full_{timestamp}.xlsx", index=False, engine='openpyxl')

  df = pd.read_csv(file_path)


Analysis completed and saved.
Processing 13850 rows after filtering
Found 290 unique drug-disease-model pairs

Analyzing Adalimumab for Alzheimer disease (49 abstracts available out of 49 total)
  Processing batch 1 of 1...
  Processing abstract 7007 - PubMed ID: 39767689

===== SAMPLE PROMPT (first 500 chars) =====
You are an expert biomedical researcher. Determine whether Adalimumab is effective against Alzheimer disease based on the provided information, with primary focus on the abstract content.

DRUG INFORMATION:
Drug: Adalimumab
Description: Adalimumab is a subcutaneously administered biological disease modifier for the treatment of rheumatoid arthritis and other chronic debilitating diseases mediated by tumor necrosis factor ,. It was originally launched by Abbvie in the U.S. and approved in 2002 by...



  return bound(*args, **kwds)


[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  Total possible abstracts: 4
  Available abstracts: 4
  Abstracts analyzed: 4
  Positive: 0 (0.0%)
  Neutral: 4 (100.0%)
  Negative: 0 (0.0%)
  Unknown: 0 (0.0%)
  Overall assessment: Neutral

Analyzing Amiodarone for heart failure and Alzheimer disease (200 abstracts available out of 200 total)
  Processing batch 1 of 6...
  Processing abstract 13380 - PubMed ID: 40109641

===== SAMPLE PROMPT (first 500 chars) =====
You are an expert biomedical researcher. Determine whether Amiodarone is effective against heart failure and Alzheimer disease based on the provided information, with primary focus on the abstract content.

DRUG INFORMATION:
Drug: Amiodarone
Description: Amiodarone is a benzofuran derivative, anti-arrhythmic drug used commonly in a variety of settings. Most known for its approved indication in life-threatening ventricular arrhythmias, it is also used off-label in the outpatient and inpatient set...


===== SAMPLE RESPONSE =====
Res

In [4]:
import pandas as pd
import time
import random
import numpy as np
from openai import OpenAI
from openai import RateLimitError, APIError
from collections import Counter

# Set your OpenAI API key
with open('/content/drive/MyDrive/KG/openai_api.txt', 'r') as f:
    api_key = f.read().strip()

client = OpenAI(api_key=api_key)

# Set the API calling mode here. Options: "chat", "response", "response_web"
mode = "response"  # Using regular response mode since we're focusing on abstract analysis

def construct_comprehensive_prompt(row, abstract):
    """
    Constructs a comprehensive prompt including requested drug and disease information,
    with focus on analyzing the abstract to determine drug effectiveness.

    Parameters:
    - row: DataFrame row containing drug and disease information
    - abstract: PubMed abstract text

    Returns:
    - Formatted prompt string
    """
    drug_name = row['Drug_name']
    disease_name = row['Disease_name']

    # Start with base prompt
    prompt = (
        f"You are an expert biomedical researcher. Determine whether {drug_name} is effective "
        f"against {disease_name} based on the provided information, with primary focus on the abstract content.\n\n"
        f"DRUG INFORMATION:\n"
        f"Drug: {drug_name}\n"
    )

    # Add key drug information fields
    key_fields = ['description', 'mechanism_of_action', 'protein_binding', 'pharmacodynamics', 'category']
    for field in key_fields:
        if field in row and pd.notnull(row[field]) and row[field]:
            value = str(row[field])
            # # Limit length of very long text
            # if len(value) > 300:
            #     value = value[:300] + "..."
            prompt += f"{field.replace('_', ' ').title()}: {value}\n"

    # Add drug synonyms
    drug_synonyms = []
    for i in range(1, 11):  # drug_synonym1 through drug_synonym21
        syn_col = f"drug_synonym{i}"
        if syn_col in row and pd.notnull(row[syn_col]) and row[syn_col]:
            drug_synonyms.append(str(row[syn_col]))

    if drug_synonyms:
        prompt += f"Drug Synonyms: {', '.join(drug_synonyms)}\n"

    # Add disease information
    prompt += f"\nDISEASE INFORMATION:\n"
    prompt += f"Disease: {disease_name}\n"

    # Add disease synonyms
    disease_synonyms = []
    for i in range(1, 11):  # disease_synonym1 through disease_synonym15
        syn_col = f"disease_synonym{i}"
        if syn_col in row and pd.notnull(row[syn_col]) and row[syn_col]:
            disease_synonyms.append(str(row[syn_col]))

    if disease_synonyms:
        prompt += f"Disease Synonyms: {', '.join(disease_synonyms)}\n"

    # Add the abstract (as the main factor)
    prompt += f"\nABSTRACT TEXT (primary evidence source):\n{abstract}\n\n"

    # Add conclusion request
    prompt += (
        "Based primarily on the abstract, and considering the drug and disease information provided, "
        "assess whether the drug is effective for the disease. Provide your assessment in the following format:\n\n"
        "Result: [Positive/Neutral/Negative] (Choose exactly one, where Positive means the drug is effective, "
        "Neutral means uncertain or insufficient evidence, and Negative means the drug is ineffective or harmful)\n"
        "Explanation: [provide a brief explanation in 2-3 sentences focusing mainly on evidence from the abstract]"
    )

    return prompt

def call_gpt_with_retry(prompt, mode, max_retries=5):
    """
    Calls the GPT-4o API with exponential backoff retry logic for rate limits.
    """
    for attempt in range(max_retries):
        try:
            if mode == "chat":
                completion = client.chat.completions.create(
                    model="gpt-4o",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.1,  # Lower temperature for more consistent results
                )
                return completion.choices[0].message.content.strip()
            elif mode == "response":
                response = client.responses.create(
                    model="gpt-4.1",
                    input=prompt,
                    temperature=0.2,  # Lower temperature for more consistent results
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            elif mode == "response_web":
                response = client.responses.create(
                    model="gpt-4o",
                    tools=[{"type": "web_search_preview"}],
                    input=prompt,
                    temperature=0.1,  # Lower temperature for more consistent results
                )
                # Extract text from the output
                for item in response.output:
                    if hasattr(item, "type") and item.type == "message":
                        if hasattr(item, "content"):
                            for content_item in item.content:
                                if hasattr(content_item, "text"):
                                    return content_item.text.strip()
                return "No output found in response structure."
            else:
                raise ValueError("Invalid mode selected.")

        except RateLimitError as e:
            print(f"Rate limit error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Exponential backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 20  # Longer wait for rate limits
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)
        except APIError as e:
            print(f"API error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Standard backoff with jitter
            wait_time = (2 ** attempt) + random.uniform(0, 1) + 5
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)
        except Exception as e:
            print(f"Unexpected error (attempt {attempt+1}/{max_retries}): {e}")

            # If we've reached the max retries, just return the error
            if attempt == max_retries - 1:
                return f"API call error after {max_retries} attempts: {str(e)}"

            # Shorter wait for non-rate-limit errors
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            print(f"Waiting {wait_time:.2f} seconds before retrying...")
            time.sleep(wait_time)

    return "Failed after maximum retry attempts"

def parse_assessment_output(output):
    """
    Parse the assessment output to extract Result and Explanation.
    Expected format:
    Result: [Positive/Neutral/Negative]
    Explanation: [explanation]
    """
    import re

    result = "Unknown"
    explanation = ""

    if output and not output.startswith("API call error"):
        # Try to find Result section using case-insensitive matching
        result_match = re.search(r'result:\s*(positive|neutral|negative)', output.lower())
        if result_match:
            result_value = result_match.group(1)
            # Convert to proper case format
            result = result_value.capitalize()

        # Try alternate format if not found (sometimes GPT outputs "Result - Positive" format)
        if result == "Unknown":
            alt_result_match = re.search(r'result\s*[-:]\s*(positive|neutral|negative)', output.lower())
            if alt_result_match:
                result_value = alt_result_match.group(1)
                result = result_value.capitalize()

        # Try to find Explanation section using case-insensitive matching
        explanation_match = re.search(r'explanation:\s*(.*?)(?:\n\n|\n*$)', output, re.IGNORECASE | re.DOTALL)
        if explanation_match:
            explanation = explanation_match.group(1).strip()
        else:
            # Try alternate format or look for any text after the result
            after_result = re.search(r'(positive|neutral|negative)[.:]\s*(.*?)(?:\n\n|\n*$)', output.lower(), re.DOTALL)
            if after_result:
                explanation = after_result.group(2).strip()

    # If parsing failed, return the whole output as explanation
    if result == "Unknown" and not explanation:
        explanation = output
        # Make final attempt to extract result from the text
        if "positive" in output.lower() and "negative" not in output.lower():
            result = "Positive"
        elif "negative" in output.lower() and "positive" not in output.lower():
            result = "Negative"
        elif "neutral" in output.lower() or "insufficient evidence" in output.lower():
            result = "Neutral"

    return result, explanation

def analyze_drug_disease_abstracts(df, mode="response", filter_disease=None, filter_drug=None, batch_size=5, max_abstracts_per_pair=50):
    """
    Analyze abstracts for each drug-disease pair to determine effectiveness.
    """
    import re

    # Apply filters as before
    if filter_disease:
        if isinstance(filter_disease, str):
            df_filtered = df[df['Disease_name'].str.contains(filter_disease, case=False)]
        elif isinstance(filter_disease, list):
            df_filtered = df[df['Disease_name'].isin(filter_disease)]
        else:
            raise ValueError("filter_disease must be a string or list of strings")
    else:
        df_filtered = df

    if filter_drug:
        if isinstance(filter_drug, str):
            df_filtered = df_filtered[df_filtered['Drug_name'].str.contains(filter_drug, case=False)]
        elif isinstance(filter_drug, list):
            df_filtered = df_filtered[df_filtered['Drug_name'].isin(filter_drug)]
        else:
            raise ValueError("filter_drug must be a string or list of strings")

    print(f"Processing {len(df_filtered)} rows after filtering")

    # Log unique drug names to verify all are present
    unique_drugs = df_filtered['Drug_name'].unique()
    print(f"Found {len(unique_drugs)} unique drugs in filtered data")
    print(f"Sample drugs: {', '.join(unique_drugs[:5])}...")

    # Check for specific drugs we're concerned about
    for drug in ['Parthenolide', 'Kaempferol']:
        if drug in unique_drugs:
            print(f"✓ {drug} is present in filtered data")
            # Get sample row to verify data
            sample = df_filtered[df_filtered['Drug_name'] == drug].iloc[0]
            print(f"  Sample Disease: {sample['Disease_name']}")
            print(f"  Sample Drug_ID: {sample.get('Drug_ID', 'Missing')}")
        else:
            print(f"✗ {drug} is NOT present in filtered data")

    # CRITICAL CHANGE: Group by Drug_name and Disease_name instead of IDs
    # This ensures we catch all drugs even if IDs are inconsistent
    drug_disease_pairs = df_filtered.groupby(['Drug_name', 'Disease_name']).agg({
        'Drug_ID': 'first',  # Take whatever ID is available
        'Disease_ID': 'first',
        'Model': 'first',
        'pubmed_id_count': 'first',
        'pubmed_id': 'count'
    }).reset_index()

    drug_disease_pairs.rename(columns={'pubmed_id': 'available_abstracts'}, inplace=True)
    print(f"Found {len(drug_disease_pairs)} unique drug-disease pairs")

    # Sort as before
    drug_disease_pairs = drug_disease_pairs.sort_values(['Disease_name', 'Drug_name'], ascending=[True, True])

    # Double-check our problem drugs are in the pairs
    for drug in ['Parthenolide', 'Kaempferol']:
        pair_check = drug_disease_pairs[drug_disease_pairs['Drug_name'] == drug]
        if len(pair_check) > 0:
            print(f"✓ {drug} appears in {len(pair_check)} drug-disease pairs")
        else:
            print(f"✗ {drug} is NOT found in any drug-disease pairs")

    abstract_results = []
    summary_results = []

    # Process each drug-disease pair
    for _, pair_row in drug_disease_pairs.iterrows():
        drug_name = pair_row['Drug_name']
        disease_name = pair_row['Disease_name']
        drug_id = pair_row['Drug_ID']
        disease_id = pair_row['Disease_ID']
        model = pair_row['Model']
        total_abstracts_count = pair_row['pubmed_id_count']
        available_abstracts = pair_row['available_abstracts']

        print(f"\nAnalyzing {drug_name} for {disease_name} ({available_abstracts} abstracts available out of {total_abstracts_count} total)")

        # CRITICAL CHANGE: Filter by names rather than IDs
        group = df_filtered[
            (df_filtered['Drug_name'] == drug_name) &
            (df_filtered['Disease_name'] == disease_name)
        ]

        # Rest of processing code remains the same



        # Limit abstracts to process if needed
        if len(group) > max_abstracts_per_pair:
            print(f"  Limiting analysis to {max_abstracts_per_pair} abstracts out of {len(group)}")
            group = group.head(max_abstracts_per_pair)

        pair_results = []

        # Process abstracts in batches
        for batch_idx, batch_df in enumerate(np.array_split(group, max(1, len(group) // batch_size))):
            print(f"  Processing batch {batch_idx+1} of {max(1, len(group) // batch_size)}...")

            for idx, row in batch_df.iterrows():
              try:
                # If abstract is missing or too short, use title or other available information
                abstract_text = row.get('abstract', '')
                if pd.isnull(abstract_text) or len(str(abstract_text)) < 50:
                  # Use title if abstract is missing, or combine them if abstract is just short
                  title_text = row.get('title', 'No title available')
                  if pd.isnull(abstract_text) or len(str(abstract_text)) == 0:
                    print(f"  Abstract missing for row {idx} - Using title only: {title_text}")
                    abstract_text = f"TITLE ONLY: {title_text}"
                    row = row.copy()
                    row['abstract'] = abstract_text
                  else:
                    print(f"  Short abstract for row {idx} - Supplementing with title")
                    abstract_text = f"TITLE: {title_text}\nSHORT ABSTRACT: {abstract_text}"
                    row = row.copy()
                    row['abstract'] = abstract_text

                # Get PubMed ID and title for reference
                pubmed_id = row.get('pubmed_id', 'Unknown')
                title = row.get('title', 'No title')

                print(f"  Processing abstract {idx} - PubMed ID: {pubmed_id}")

                # Use the comprehensive prompt construction
                try:
                  prompt = construct_comprehensive_prompt(row, row['abstract'])
                except KeyError as e:
                  print(f"  WARNING: Missing key in row: {e}")
                  continue  # Skip this row
                except Exception as e:
                  print(f"  ERROR in prompt construction: {e}")
                  continue  # Skip this row

                # Debug the first prompt in the first batch
                if batch_idx == 0 and len(pair_results) == 0:
                    print("\n===== SAMPLE PROMPT (first 500 chars) =====")
                    print(prompt[:500] + "..." if len(prompt) > 500 else prompt)
                    print("=========================================\n")

                # Call the API
                output = call_gpt_with_retry(prompt, mode)

                # Debug the first response
                if batch_idx == 0 and len(pair_results) == 0:
                    print("\n===== SAMPLE RESPONSE =====")
                    print(output[:500] + "..." if len(output) > 500 else output)
                    print("===========================\n")

                # Parse the output
                result, explanation = parse_assessment_output(output)

                # Store results for this abstract
                abstract_result = {
                    "Drug_ID": drug_id,
                    "Drug_name": drug_name,
                    "Disease_ID": disease_id,
                    "Disease_name": disease_name,
                    "PubMed_ID": pubmed_id,
                    "Title": title,
                    "Model": row.get('Model', 'Unknown'),
                    "Result": result,
                    "Explanation": explanation,
                    "Raw_Output": output
                }

                abstract_results.append(abstract_result)
                pair_results.append(abstract_result)

                # Pause between API calls
                #time.sleep(2)
              except Exception as e:
                print(f"  ERROR processing row {idx}: {e}")
                continue  # Skip to the next row

            # Longer pause between batches
            if batch_idx < max(1, len(group) // batch_size) - 1:
                pause_time = 1
                print(f"  Batch complete. Pausing for {pause_time} seconds...")
                time.sleep(pause_time)

        # Calculate statistics for this drug-disease pair
        result_counts = Counter([r["Result"] for r in pair_results])
        analyzed_results = len(pair_results)

        if analyzed_results > 0:
            summary = {
                "Drug_ID": drug_id,
                "Drug_name": drug_name,
                "Disease_ID": disease_id,
                "Disease_name": disease_name,
                "Model": model,
                "Total_Possible_Abstracts": total_abstracts_count,
                "Available_Abstracts": available_abstracts,
                "Analyzed_Abstracts": analyzed_results,
                "Positive_Count": result_counts.get("Positive", 0),
                "Neutral_Count": result_counts.get("Neutral", 0),
                "Negative_Count": result_counts.get("Negative", 0),
                "Unknown_Count": result_counts.get("Unknown", 0),
                "Positive_Percent": (result_counts.get("Positive", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Neutral_Percent": (result_counts.get("Neutral", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Negative_Percent": (result_counts.get("Negative", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Unknown_Percent": (result_counts.get("Unknown", 0) / analyzed_results) * 100 if analyzed_results > 0 else 0,
                "Overall_Assessment": "Inconclusive"  # Will be updated below
            }

            # Determine overall assessment based on percentages
            if summary["Positive_Percent"] >= 60:
                summary["Overall_Assessment"] = "Positive"
            elif summary["Negative_Percent"] >= 60:
                summary["Overall_Assessment"] = "Negative"
            elif summary["Neutral_Percent"] >= 60:
                summary["Overall_Assessment"] = "Neutral"
            # If one category is significantly more common than others
            elif (summary["Positive_Percent"] > summary["Neutral_Percent"] + 20 and
                  summary["Positive_Percent"] > summary["Negative_Percent"] + 20):
                summary["Overall_Assessment"] = "Likely Positive"
            elif (summary["Negative_Percent"] > summary["Neutral_Percent"] + 20 and
                  summary["Negative_Percent"] > summary["Positive_Percent"] + 20):
                summary["Overall_Assessment"] = "Likely Negative"

            summary_results.append(summary)

            print(f"\nSummary for {drug_name} and {disease_name}:")
            print(f"  Total possible abstracts: {total_abstracts_count}")
            print(f"  Available abstracts: {available_abstracts}")
            print(f"  Abstracts analyzed: {analyzed_results}")
            print(f"  Positive: {summary['Positive_Count']} ({summary['Positive_Percent']:.1f}%)")
            print(f"  Neutral: {summary['Neutral_Count']} ({summary['Neutral_Percent']:.1f}%)")
            print(f"  Negative: {summary['Negative_Count']} ({summary['Negative_Percent']:.1f}%)")
            print(f"  Unknown: {summary['Unknown_Count']} ({summary['Unknown_Percent']:.1f}%)")
            print(f"  Overall assessment: {summary['Overall_Assessment']}")

    # Create DataFrames
    abstract_df = pd.DataFrame(abstract_results)
    summary_df = pd.DataFrame(summary_results)

    return abstract_df, summary_df

# Main execution code
if __name__ == "__main__":
    import re  # For regex in parse_assessment_output

    # Load the data file
    file_path = "/content/drive/MyDrive/KG/eAIC_abstract_model_with_not_drug_id.xlsx"
    df = pd.read_excel(file_path, engine='openpyxl')

    # Example usage patterns:

    # 1. Run for a specific disease
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease="cardiomyopathy due to anthracyclines",
    #     batch_size=3,        # Process 3 abstracts per batch to avoid rate limits
    #     max_abstracts_per_pair=20  # Limit analysis to 20 abstracts per drug-disease pair
    # )

    # # Save results with timestamp to avoid overwriting previous analyses
    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_csv(f"/content/drive/MyDrive/KG/abstract_analysis_cardiomyopathy_{timestamp}.csv", index=False)
    # summary_results.to_csv(f"/content/drive/MyDrive/KG/summary_analysis_cardiomyopathy_{timestamp}.csv", index=False)

    print("Analysis completed and saved.")

    # Uncomment and modify as needed for other analysis scenarios:

    # 2. Run for a specific drug and disease combination
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease="cardiomyopathy due to anthracyclines",
    #     filter_drug="Caffeine",
    #     batch_size=5,
    #     max_abstracts_per_pair=60
    # )

    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/abstract_analysis_caffeine_cardiomyopathy_{timestamp}.xlsx", index=False, engine='openpyxl')
    # summary_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/summary_analysis_caffeine_cardiomyopathy_{timestamp}.xlsx", index=False, engine='openpyxl')

    # 3. Run for multiple diseases
    # abstract_results, summary_results = analyze_drug_disease_abstracts(
    #     df,
    #     mode="response",
    #     filter_disease=["diabetes", "asthma"],
    #     batch_size=3,
    #     max_abstracts_per_pair=15
    # )
    #
    # timestamp = time.strftime("%Y%m%d_%H%M%S")
    # abstract_results.to_csv(f"/content/drive/MyDrive/KG/abstract_analysis_diabetes_asthma_{timestamp}.csv", index=False)
    # summary_results.to_csv(f"/content/drive/MyDrive/KG/summary_analysis_diabetes_asthma_{timestamp}.csv", index=False)
    #
    # 4. Run for all data with reasonable limits
    abstract_results, summary_results = analyze_drug_disease_abstracts(
        df, #.sample(frac=1).reset_index(drop=True),  # Shuffle data to get diverse samples
        mode="response",
        batch_size=30,
        max_abstracts_per_pair=200  # Limit to 10 abstracts per pair for full dataset
    )

    timestamp = time.strftime("%Y%m%d_%H%M%S")
    abstract_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/eAIC_abstract_analysis_nodrugid_{timestamp}.xlsx", index=False, engine='openpyxl')
    summary_results.to_excel(f"/content/drive/MyDrive/KG/abstract_result/eAIC_summary_analysis_nodrugid_{timestamp}.xlsx", index=False, engine='openpyxl')

Analysis completed and saved.
Processing 71 rows after filtering
Found 4 unique drugs in filtered data
Sample drugs: Luteolin, Parthenolide, Kaempferol, Cycloheximide...
✓ Parthenolide is present in filtered data
  Sample Disease: cardiomyopathy due to anthracyclines
  Sample Drug_ID: nan
✓ Kaempferol is present in filtered data
  Sample Disease: doxorubicin induced cardiomyopathy
  Sample Drug_ID: nan
Found 8 unique drug-disease pairs
✓ Parthenolide appears in 2 drug-disease pairs
✓ Kaempferol appears in 2 drug-disease pairs

Analyzing Cycloheximide for cardiomyopathy due to anthracyclines (6 abstracts available out of 6 total)
  Processing batch 1 of 1...
  Processing abstract 59 - PubMed ID: 36737649

===== SAMPLE PROMPT (first 500 chars) =====
You are an expert biomedical researcher. Determine whether Cycloheximide is effective against cardiomyopathy due to anthracyclines based on the provided information, with primary focus on the abstract content.

DRUG INFORMATION:
Drug: Cyclohe

  return bound(*args, **kwds)



===== SAMPLE RESPONSE =====
Result: Neutral

Explanation: The abstract mentions the use of a "cycloheximide chasing assay" as a methodological tool to study protein degradation but does not provide any evidence that cycloheximide itself is effective in treating cardiomyopathy due to anthracyclines. There is no indication that cycloheximide has therapeutic benefits for this condition; it is only referenced as part of an experimental procedure.

  Processing abstract 60 - PubMed ID: 34580724
  Processing abstract 61 - PubMed ID: 29796387
  Processing abstract 62 - PubMed ID: 19590044
  Processing abstract 63 - PubMed ID: 12970367
  Processing abstract 64 - PubMed ID: 12967636

Summary for Cycloheximide and cardiomyopathy due to anthracyclines:
  Total possible abstracts: 6
  Available abstracts: 6
  Abstracts analyzed: 6
  Positive: 0 (0.0%)
  Neutral: 6 (100.0%)
  Negative: 0 (0.0%)
  Unknown: 0 (0.0%)
  Overall assessment: Neutral

Analyzing Kaempferol for cardiomyopathy due to anthra