In [None]:
from autogen import AssistantAgent, UserProxyAgent
from dotenv import load_dotenv
import os

In [None]:
from apify_client import ApifyClient
from typing_extensions import Annotated

In [None]:
load_dotenv('.env')

In [None]:
api_key = os.getenv('APIFY_API_KEY')

In [None]:
def scrape_page(url: Annotated[str, "The URL of the web page to scrape"]) -> Annotated[str, "Scraped content"]:
    # Initialize the ApifyClient with your API token
    client = ApifyClient(token=apify_api_key)

    # Prepare the Actor input
    run_input = {
        "startUrls": [{"url": url}],
        "useSitemaps": False,
        "crawlerType": "playwright:firefox",
        "includeUrlGlobs": [],
        "excludeUrlGlobs": [],
        "ignoreCanonicalUrl": False,
        "maxCrawlDepth": 0,
        "maxCrawlPages": 1,
        "initialConcurrency": 0,
        "maxConcurrency": 200,
        "initialCookies": [],
        "proxyConfiguration": {"useApifyProxy": True},
        "maxSessionRotations": 10,
        "maxRequestRetries": 5,
        "requestTimeoutSecs": 60,
        "dynamicContentWaitSecs": 10,
        "maxScrollHeightPixels": 5000,
        "removeElementsCssSelector": """nav, footer, script, style, noscript, svg,
    [role=\"alert\"],
    [role=\"banner\"],
    [role=\"dialog\"],
    [role=\"alertdialog\"],
    [role=\"region\"][aria-label*=\"skip\" i],
    [aria-modal=\"true\"]""",
        "removeCookieWarnings": True,
        "clickElementsCssSelector": '[aria-expanded="false"]',
        "htmlTransformer": "readableText",
        "readableTextCharThreshold": 100,
        "aggressivePrune": False,
        "debugMode": True,
        "debugLog": True,
        "saveHtml": True,
        "saveMarkdown": True,
        "saveFiles": False,
        "saveScreenshots": False,
        "maxResults": 9999999,
        "clientSideMinChangePercentage": 15,
        "renderingTypeDetectionPercentage": 10,
    }

    # Run the Actor and wait for it to finish
    run = client.actor("aYG0l9s7dbB7j3gbS").call(run_input=run_input)

    # Fetch and print Actor results from the run's dataset (if there are any)
    text_data = ""
    for item in client.dataset(run["defaultDatasetId"]).iterate_items():
        text_data += item.get("text", "") + "\n"

    average_token = 0.75
    max_tokens = 20000  # slightly less than max to be safe 32k
    text_data = text_data[: int(average_token * max_tokens)]
    return text_data

In [None]:
# Configuration for Ollama
config_list = [
    {
        "model": "qwen2.5:latest",
        "base_url": "http://localhost:11434/v1/",
        "api_key": "ollama", 
    }
]

# Create agents
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "seed": 42,
        "config_list": config_list,
        "temperature": 0.4,
    },
    is_termination_msg=lambda x: True if "TERMINATE" in x.get("content") else False,
)

user_proxy = UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: True if "TERMINATE" in x.get("content") else False,
    max_consecutive_auto_reply=10,
    human_input_mode="NEVER",
    code_execution_config={
        "work_dir": "coding",
        "use_docker": False,
    },
)

In [None]:
CLINICAL_QUESTION = "What is the prognosis for hemangiosaracoma in dogs?"
TASK_1 = f"Find 10 research papers to help answer the following question: {CLINICAL_QUESTION}. Provide the url for each of these papers."


In [None]:
# Start looking for research papers
user_proxy.initiate_chat(
    assistant,
    message=TASK_1
)

In [None]:
import requests
from bs4 import BeautifulSoup

# Define the search query URL on PubMed
url = "https://pubmed.ncbi.nlm.nih.gov/?term=hemangiosarcoma+dog+prognosis"

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find all relevant paper links from the search results
paper_links = []
results = soup.find_all('a', {'href': True})
for link in results:
    href = link['href']
    if '/pubmed/' in href and not href.startswith('#'):
        paper_link = f"https://pubmed.ncbi.nlm.nih.gov{href}"
        paper_links.append(paper_link)
        if len(paper_links) == 10:  # Collect the first 10 papers
            break

# Print the URLs of the top 10 relevant research papers
for i, link in enumerate(paper_links):
    print(f"Paper {i+1}: {link}")

print("Found and printed the URLs of 10 relevant research papers.")

In [None]:
from autogen import ConversableAgent, register_function

# Create web scrapper agent.
scraper_agent = ConversableAgent(
    "WebScraper",
    llm_config={"config_list": config_list},
    system_message="You are a web scrapper and you can scrape any web page using the tools provided. "
    "Returns 'TERMINATE' when the scraping is done.",
)

# Create user proxy agent.
user_proxy_agent = ConversableAgent(
    "UserProxy",
    llm_config=False,  # No LLM for this agent.
    human_input_mode="NEVER",
    code_execution_config=False,  # No code execution for this agent.
    is_termination_msg=lambda x: x.get("content", "") is not None and "terminate" in x["content"].lower(),
    default_auto_reply="Please continue if not finished, otherwise return 'TERMINATE'.",
)

# Register the function with the agents.
register_function(
    scrape_page,
    caller=scraper_agent,
    executor=user_proxy_agent,
    name="scrape_page",
    description="Scrape a web page and return the content.",
)