In [None]:
# Core imports for scientific poster analysis
from mcp import stdio_client, StdioServerParameters  # Model Context Protocol for tool integration
from strands import Agent                            # Strands Agent SDK for orchestrating AI workflows
from strands.tools.mcp import MCPClient             # MCP client wrapper for Strands
from strands.models import BedrockModel             # Amazon Bedrock model integration
from strands_tools import file_read, file_write, http_request, handoff_to_user  # Built-in Strands tools
from strands.handlers.callback_handler import PrintingCallbackHandler
import os
import urllib3

In [4]:
os.environ["BYPASS_TOOL_CONSENT"] = "true"

In [5]:
# Configuration: Disable SSL warnings for development
# Note: Remove this in production environments for security
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [3]:
fetch_client = MCPClient(
    lambda: stdio_client(
        StdioServerParameters(
            command="uvx",  # Use uvx to run the MCP server
            args=["mcp-server-fetch"]

        )
    )
)

In [None]:
# Execute the poster analysis
bedrock_model = BedrockModel(
    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    temperature=0.2,
)

SYSTEM_PROMPT = """
You are a clinical analyst. Your responsibility is to retrieve pipeline information from pharmaceutical companies. 
Using web crawling techniques and respecting robot.txt, extract pipeline data. 
"""

with fetch_client:
    tools = fetch_client.list_tools_sync()

    agent = Agent(tools= [tools, http_request, file_write, handoff_to_user],
                  model=bedrock_model,
                  system_prompt=SYSTEM_PROMPT)

    response = agent("Download pipeline data from Novartis pipeline from https://www.novartis.com/research-development/novartis-pipeline")

    print(result['content'][0]['text'])

In [None]:
# Execute the poster analysis
bedrock_model = BedrockModel(
    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    temperature=0.2,
)

# Data Collection Agent to retrieve pipeline data from company websites
data_collection_agent = Agent(
    system_prompt=(
        "You are a clinical analyst. Your responsibility is to retrieve pipeline information from pharmaceutical companies. Using web crawling techniques and respecting robot.txt, extract pipeline data in JSON format."
        "1. Determine if the input is pipeline data or other information on the website like header, footer."
        "2. Use your research tools (http_request, retrieve, fetch) to find relevant information"
        "3. Trust file_write and file_read tools"
    ),
    model=bedrock_model,
    callback_handler=PrintingCallbackHandler(),
    tools=[http_request, file_read, file_write, handoff_to_user]
)

# Standardisation Agent for creating a common data model for the collected data
standardisation_agent = Agent(
    callback_handler=None,
    system_prompt=(
        "You are a Standardisation Agent that verifies information and creates a common data model. "
        "1. Create a common data model that standardises the data collected by data_collection_agent " \
        "2. Document the data model"
    )
)

# Enricher Agent for enriching the data with ontologies
enricher_agent = Agent(
    system_prompt=(
        "You are an Enricher Agent that map the data to healthcare and life sciences ontologies like MeSH codes "
        "1. Identify ontologies and map the data "
        "2. Create controlled vocabularies where needed"
        "3. Document your chose for ontologies"
        "4. Create JSON file with final output "
        "5. Save the final output"
    )
)

In [7]:
def run_research_workflow(user_input):
    # Step 1: Researcher Agent gathers web information
    data_collection_response = data_collection_agent(
        f"Research: '{user_input}'. Use your available tools to gather information from reliable sources.",
    )
    data_collection_findings = str(data_collection_response)

    # Step 2: Analyst Agent verifies facts
    standardisation_response = standardisation_agent(
        f"Analyze these findings about '{user_input}':\n\n{data_collection_findings}",
    )
    std_analyser = str(standardisation_response)

    # Step 3: Writer Agent creates report
    final_data = enricher_agent(
        f"Create a report on '{user_input}' based on this analysis:\n\n{std_analyser}"
    )

    return final_data

In [8]:
user_input = "Download pipeline data from Novartis pipeline (https://www.novartis.com/research-development/novartis-pipeline) " \
"and Novo Nordisk pipeline (https://www.novonordisk.com/science-and-technology/r-d-pipeline.html)"
run_research_workflow(user_input)

I'll help you download pipeline data from Novartis and Novo Nordisk using the available tools. Let me start by examining each company's pipeline page.

First, let's check Novartis' pipeline:
Tool #1: http_request


Now let's check Novo Nordisk's pipeline:
Tool #2: http_request


Now that I've examined both websites, I'll extract the pipeline data from each company and save it in JSON format.

Let's start with Novartis:
Tool #3: file_write


Now let's extract the Novo Nordisk pipeline data:
Tool #4: file_write


Now I'll create a summary comparison of the two pipelines:
Tool #5: file_write


## Summary of Pipeline Data Extraction

I've successfully extracted and analyzed the pipeline data from both Novartis and Novo Nordisk. Here's a summary of what I found:

### Novartis Pipeline
- **Total compounds**: 104 compounds in development
- **Key therapeutic areas**: Strong focus on oncology, particularly solid tumors with radioligand therapies
- **Development stages**: Well-distributed across Phase 1, 2, 3, and Registration
- **Notable compounds**: 
  - Multiple radioligand therapies (Lutathera®, Pluvicto®)
  - Established products with new indications (Cosentyx®, Mayzent®)
  - Novel compounds for rare diseases

### Novo Nordisk Pipeline
- **Total compounds**: 47 compounds in development
- **Key therapeutic areas**: Strong focus on diabetes and obesity, with growing interest in cardiovascular disease
- **Development stages**: Balanced across Phase 1, 2, 3, and Filed status
- **Notable compounds**:
  - Multiple GLP-1 based therapies (Semaglutide variants)
  - Novel combinations (

AgentResult(stop_reason='end_turn', message={'role': 'assistant', 'content': [{'text': '# Pharmaceutical Pipeline Data Standardization Report with Ontology Mapping\n\n## Executive Summary\n\nThis report presents a comprehensive ontology-enriched data standardization framework for pharmaceutical pipeline data from Novartis and Novo Nordisk. The framework incorporates healthcare and life sciences ontologies including MeSH codes, SNOMED CT, ATC codes, and ChEBI to ensure semantic interoperability and standardized terminology across pharmaceutical pipeline datasets.\n\n## 1. Ontology Selection and Mapping Strategy\n\n### 1.1 Primary Ontologies Selected\n\n| Ontology | Purpose | Coverage | Implementation |\n|----------|---------|-----------|----------------|\n| **MeSH (Medical Subject Headings)** | Disease/condition standardization | Therapeutic indications, diseases | Primary mapping for clinical conditions |\n| **SNOMED CT** | Clinical terminology | Medical concepts, procedures | Secondar