# Current process

main-agent  
TOI-agent -> drug name recognition  
PDF-agent -> PDF load and processing  
Info-agent -> retrieval DILI related keywords and their sections (if applicable) from the processed PDF  
DILI-agent -> determine DILI status

In [92]:
import openai
from swarm import Swarm, Agent
import os, re
import PyPDF2  

In [2]:
with open('env.txt','r') as env_file:
    for line in env_file.readlines():
        key, val = re.split('=', line.strip())
        os.environ[key]=val

In [101]:
pdf_dict = {
    'tamoxifen': './data/hc/00064472.pdf',
    'atorvastatin': './data/hc/00066863.pdf'
}

In [118]:
prompt_main = f'''
You are a helpful agent to determine which agent to use for user.
If the user asked for DILI or liver injury classification of a drug, use Agent DILI.
If the user asked for DICT or cardiotoxicity classification of a drug, use Agent DICT.
Otherwise, use Agent Generic.
'''

prompt_DILI = f'''
Transfer the query to Agent TOI to retrieve the full text from the PDF.
Answer the user question based on the information extracted from.
'''

prompt_TOI = f'''
Identify the most relevant PDF based on the drug name mentioned by the user, extract the full text, and return it.
{str(pdf_dict)}
'''

# Define PDF text extraction function
def extract_pdf_text(file_path):
    try:
        with open(file_path, "rb") as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            full_text = ""
            for page in pdf_reader.pages:
                full_text += page.extract_text()
        return full_text
    except FileNotFoundError:
        return "Error: PDF file not found."
    except Exception as e:
        return f"Error reading PDF file: {e}"

def find_dili_keywords(text):
    # Define the prompt to instruct the LLM to identify DILI-specific keywords
    prompt = f"""
    You are an expert in drug-induced liver injury (DILI). 
    Please extract all DILI-related keywords from the following text. 
    Provide the keywords as a list, focusing only on terms directly relevant to liver injury.
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "system", "content": prompt},
                      {"role": "user", "content": text}
                     ],
            max_tokens=150,
            temperature=0
        )
        # print(text[:10], response)
        keywords = response.choices[0].message.content.strip()
        print('\n==DILI information found in the document===\n',keywords,'\n===End===\n')
        return keywords
    except Exception as e:
        print(f"Error querying OpenAI API: {e}")
        return "Error extracting keywords."

# Define Agent transfer functions
def transfer_to_agent_DILI():
    print('use Agent DILI')
    return agent_b

def transfer_to_agent_DICT():
    print('use Agent DICT')
    return agent_c

def transfer_to_agent_Generic():
    print('use Agent Generic')
    return agent_d

def transfer_to_agent_TOI(drug_name):
    print('use Agent TOI')
    # Retrieve file path from pdf_dict based on drug name
    file_path = pdf_dict.get(drug_name, None)
    print(f'This file {file_path} is used since drug name "{drug_name}" was found.')
    if file_path:
        # Extract full text from PDF
        full_text = extract_pdf_text(file_path)
        dili_keywords = find_dili_keywords(full_text)
        return dili_keywords
        # print(full_text)
        # return full_text
    else:
        return "No relevant PDF found for this drug."


# Define the main agent and sub-agents
agent_main = Agent(
    mode="gpt-4o-mini",
    name="Agent Main",
    instructions=prompt_main,
    functions=[transfer_to_agent_DILI, transfer_to_agent_DICT, transfer_to_agent_Generic],
)

agent_b = Agent(
    mode="gpt-4o",
    name="Agent DILI",
    instructions=prompt_DILI,
    functions=[transfer_to_agent_TOI]  # Properly named function
)

agent_c = Agent(
    mode="gpt-4o-mini",
    name="Agent DICT",
    instructions="Answer whether the drug mentioned will cause cardiotoxicity.",
)

agent_d = Agent(
    mode="gpt-4o-mini",
    name="Agent Generic",
    instructions="Answer the user's question if it does not fall into any other specific categories.",
)

client = Swarm()

response = client.run(
    agent=agent_main,  # Main agent
    messages=[{"role": "user", "content": "What is the DILI class of tamoxifen?"}],
)

print(response.messages[-1]["content"])


use Agent DILI
use Agent TOI
This file ./data/hc/00064472.pdf is used since drug name "tamoxifen" was found.

==DILI information found in the document===
 Here is a list of DILI-related keywords extracted from the provided text:

1. Drug-induced liver injury (DILI)
2. Liver abnormalities
3. Hepatocellular injury
4. Hepatitis
5. Liver failure
6. Cirrhosis
7. Cholestasis
8. Fatty liver
9. Hepatic necrosis
10. Alanine aminotransferase (ALT)
11. Aspartate aminotransferase (AST)
12. Gamma-glutamyl transpeptidase (GGT)
13. Liver test abnormalities
14. Liver enzymes
15. Liver diseases
16. Hepatocellular carcinomas
17. Liver function tests
18. Elevated liver enzymes 
===End===

Based on the information extracted, the DILI class of tamoxifen can be associated with various liver-related terms such as drug-induced liver injury (DILI), hepatocellular injury, hepatitis, liver failure, and elevated liver enzymes. However, the specific DILI class or risk category for tamoxifen is not explicitly menti