# Overview: 
- Building a Simple Web Browser Agent that can scrape a website using the browser

# Reference:
- https://learn.deeplearning.ai/courses/building-ai-browser-agents/


In [1]:
import asyncio
import json
import os
import nest_asyncio
import pprint
import base64
from io import BytesIO
import pandas as pd
from playwright.async_api import async_playwright
from openai import OpenAI
from PIL import Image
from tabulate import tabulate
from IPython.display import display, HTML, Markdown
from pydantic import BaseModel
from helper import get_openai_api_key, visualizeCourses

In [2]:
client = OpenAI(api_key=get_openai_api_key())
nest_asyncio.apply()

## WebScraper Agent

In [3]:
class WebScraperAgent:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.page = None

    async def init_browser(self):
      self.playwright = await async_playwright().start()
      self.browser = await self.playwright.chromium.launch(
          headless=True,
          args=[
              "--disable-dev-shm-usage",
              "--no-sandbox",
              "--disable-setuid-sandbox",
              "--disable-accelerated-2d-canvas",
              "--disable-gpu",
              "--no-zygote",
              "--disable-audio-output",
              "--disable-software-rasterizer",
              "--disable-webgl",
              "--disable-web-security",
              "--disable-features=LazyFrameLoading",
              "--disable-features=IsolateOrigins",
              "--disable-background-networking"
          ]
      )
      self.page = await self.browser.new_page()

    async def scrape_content(self, url):
        if not self.page or self.page.is_closed():
            await self.init_browser()
        await self.page.goto(url, wait_until="load")
        await self.page.wait_for_timeout(2000)  # Wait for dynamic content
        return await self.page.content()

    async def take_screenshot(self, path="screenshot.png"):
        await self.page.screenshot(path=path, full_page=True)
        return path
    async def screenshot_buffer(self):
        screenshot_bytes = await self.page.screenshot(type="png", full_page=False)
        return screenshot_bytes

    async def close(self):
        await self.browser.close()
        await self.playwright.stop()
        self.playwright = None
        self.browser = None
        self.page = None

In [4]:
scraper = WebScraperAgent()

## Structured Data Format

In [5]:
class DeeplearningCourse(BaseModel):
    title: str
    description: str
    presenter: list[str]
    imageUrl: str
    courseURL: str

class DeeplearningCourseList(BaseModel):
    courses: list[DeeplearningCourse]

## LLM Client for Open AI

In [6]:
async def process_with_llm(html, instructions, truncate = False):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini-2024-07-18",
        messages=[{
            "role": "system",
            "content": f"""
            You are an expert web scraping agent. Your task is to:
            Extract relevant information from this HTML to JSON 
            following these instructions:
            {instructions}
            
            Extract the title, description, presenter, 
            the image URL and course URL for each of 
            all the courses for the deeplearning.ai website

            Return ONLY valid JSON, no markdown or extra text."""
        }, {
            "role": "user",
            "content": html[:150000]  # Truncate to stay under token limits
        }],
        temperature=0.1,
        response_format=DeeplearningCourseList,
        )
    return completion.choices[0].message.parsed

In [7]:
async def webscraper(target_url, instructions):
    result = None
    try:
        # Scrape content and capture screenshot
        print("Extracting HTML Content \n")
        html_content = await scraper.scrape_content(target_url)

        print("Taking Screenshot \n")
        screenshot = await scraper.screenshot_buffer()
        # Process content

        print("Processing..")
        result: DeeplearningCourseList = await process_with_llm(html_content, instructions, False)
        print("\nGenerated Structured Response")
    except Exception as e:
        print(f"❌ Error: {str(e)}")
    finally:
        await scraper.close()
    return result, screenshot

## Example 1

In [8]:
target_url = "https://www.deeplearning.ai/courses"  # Deeplearning AI courses
base_url="https://deeplearning.ai"

In [9]:
instructions = """
    Get all the courses
"""
result, screenshot = await webscraper(target_url, instructions)

Extracting HTML Content 

Taking Screenshot 

Processing..

Generated Structured Response


In [10]:
await visualizeCourses(result=result, 
                       screenshot=screenshot, 
                       target_url=target_url, 
                       instructions=instructions, 
                       base_url=base_url)

### Scraped Course Data:

title,description,presenter,imageUrl,courseURL
Build with Andrew,"If you've never written code before, this course is for you. In less than 30 minutes, you'll learn to describe an idea in words and let AI transform it into an app for you.",DeepLearning.AI,,Build with Andrew
Nvidia's NeMo Agent Toolkit: Making Agents Reliable,"Turn proof-of-concept agent demos into production-ready systems using observability, evaluation, and deployment tools from Nvidia's NeMo Agent Toolkit.",Nvidia,,Nvidia's NeMo Agent Toolkit: Making Agents Reliable
Multi-Vector Image Retrieval,"Build advanced retrieval systems that represent images with multiple vectors, enabling fine-grained matching between text queries and visual content for accurate multi-modal search.",Qdrant,,Multi-Vector Image Retrieval
Building Coding Agents with Tool Execution,"Build AI agents that write and execute code to accomplish tasks, running safely in sandboxed cloud environments that protect your systems from untrusted code.",E2B,,Building Coding Agents with Tool Execution
Semantic Caching for AI Agents,Speed up and reduce the costs of your AI agents by implementing semantic caching that reuses responses based on meaning rather than exact text.,Redis,,Semantic Caching for AI Agents
"Design, Develop, and Deploy Multi-Agent Systems with CrewAI","Build practical multi-agent systems that collaborate, use tools and memory, and scale reliably to production.",CrewAI,,"Design, Develop, and Deploy Multi-Agent Systems with CrewAI"
Jupyter AI: AI Coding in Notebooks,"Learn to code with AI in Jupyter notebooks. Use Jupyter AI to generate code, get explanations, and analyze data.",Project Jupyter,,Jupyter AI: AI Coding in Notebooks
PyTorch for Deep Learning Professional Certificate,"Learn the core principles of building, optimizing, and deploying deep learning models using PyTorch.",DeepLearning.AI,,PyTorch for Deep Learning Professional Certificate
Fine-tuning and Reinforcement Learning for LLMs: Intro to Post-Training,"Learn how to apply fine-tuning and reinforcement learning techniques to shape model behavior, improve reasoning, and make LLMs safer and more reliable.",AMD,,Fine-tuning and Reinforcement Learning for LLMs: Intro to Post-Training
Governing AI Agents,"Integrate data governance into your agent's workflow to ensure it handles data safely, securely, and accurately.",Databricks,,Governing AI Agents


### Website Screenshot:

## Example with RAG courses

In [11]:
subject = "Retrieval Augmented Generation (RAG) "

instructions = f"""
Read the description of the courses and only 
provide the three courses that are about {subject}. 
Make sure that we don't have any other
cources in the output
"""
result, screenshot = await webscraper(target_url, instructions)

Extracting HTML Content 

Taking Screenshot 

Processing..

Generated Structured Response


In [12]:
await visualizeCourses(result=result, 
                       screenshot=screenshot, 
                       target_url=target_url, 
                       instructions=instructions, 
                       base_url=base_url)

### Scraped Course Data:

title,description,presenter,imageUrl,courseURL
Retrieval Augmented Generation (RAG),"Gain fundamental understanding and the practical knowledge to develop production-ready RAG applications, from architecture to deployment and evaluation.",DeepLearning.AI,,Retrieval Augmented Generation (RAG)
Fast Prototyping of GenAI Apps with Streamlit,"Prototype and deploy GenAI apps using an MVP workflow, prompt engineering, and RAG.",Snowflake,,Fast Prototyping of GenAI Apps with Streamlit
Building and Evaluating Data Agents,"Build, evaluate, and improve a multi-agent system that plans its steps, connects to data sources, and provides insights.",Snowflake,,Building and Evaluating Data Agents


### Website Screenshot:

## Challenges in the web agents

In [13]:
subject = "Retrieval Augmented Generation (RAG) "
instructions = f"""
Can you get the summary of the top course on
{subject} provide the learnings from it
"""
result, screenshot = await webscraper(target_url, instructions)

Extracting HTML Content 

Taking Screenshot 

Processing..

Generated Structured Response


In [14]:
await visualizeCourses(result=result,
                       screenshot=screenshot,
                       target_url=target_url,
                       instructions=instructions,
                       base_url=base_url)

### Scraped Course Data:

title,description,presenter,imageUrl,courseURL
Retrieval Augmented Generation (RAG),"Gain fundamental understanding and the practical knowledge to develop production-ready RAG applications, from architecture to deployment and evaluation.",DeepLearning.AI,,Retrieval Augmented Generation (RAG)
Build with Andrew,"If you've never written code before, this course is for you. In less than 30 minutes, you'll learn to describe an idea in words and let AI transform it into an app for you.",DeepLearning.AI,,Build with Andrew
Nvidia's NeMo Agent Toolkit: Making Agents Reliable,"Turn proof-of-concept agent demos into production-ready systems using observability, evaluation, and deployment tools from Nvidia's NeMo Agent Toolkit.",Nvidia,,Nvidia's NeMo Agent Toolkit: Making Agents Reliable
Multi-Vector Image Retrieval,"Build advanced retrieval systems that represent images with multiple vectors, enabling fine-grained matching between text queries and visual content for accurate multi-modal search.",Qdrant,,Multi-Vector Image Retrieval
Building Coding Agents with Tool Execution,"Build AI agents that write and execute code to accomplish tasks, running safely in sandboxed cloud environments that protect your systems from untrusted code.",E2B,,Building Coding Agents with Tool Execution
Semantic Caching for AI Agents,Speed up and reduce the costs of your AI agents by implementing semantic caching that reuses responses based on meaning rather than exact text.,Redis,,Semantic Caching for AI Agents
"Design, Develop, and Deploy Multi-Agent Systems with CrewAI","Build practical multi-agent systems that collaborate, use tools and memory, and scale reliably to production.",CrewAI,,"Design, Develop, and Deploy Multi-Agent Systems with CrewAI"
Jupyter AI: AI Coding in Notebooks,"Learn to code with AI in Jupyter notebooks. Use Jupyter AI to generate code, get explanations, and analyze data.",Project Jupyter,,Jupyter AI: AI Coding in Notebooks
PyTorch for Deep Learning Professional Certificate,"Learn the core principles of building, optimizing, and deploying deep learning models using PyTorch.",DeepLearning.AI,,PyTorch for Deep Learning Professional Certificate
Fine-tuning and Reinforcement Learning for LLMs: Intro to Post-Training,"Learn how to apply fine-tuning and reinforcement learning techniques to shape model behavior, improve reasoning, and make LLMs safer and more reliable.",AMD,,Fine-tuning and Reinforcement Learning for LLMs: Intro to Post-Training


### Website Screenshot: