# Simple HTML Test

Test weblite with simple HTML content.

In [1]:
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'src'))

from playwright.async_api import async_playwright
from weblite import PlaywrightPage
from weblite.utils.formatting import to_compact_json

In [2]:
# Read the simple HTML file
with open('../tests/fixtures/simple_test.html', 'r') as f:
    html_content = f.read()

print("HTML loaded:")
print(html_content[:200] + "...")

HTML loaded:
<!DOCTYPE html>
<html>
<head>
    <title>Simple Test Page</title>
</head>
<body>
    <h1>Welcome</h1>
    <p>This is a simple test page.</p>
    
    <div class="form-section">
        <label>Name:</l...


In [3]:
playwright_ctx = await async_playwright().start()
browser = await playwright_ctx.chromium.launch(headless=True)
page = await browser.new_page()

# Load the HTML
await page.set_content(html_content)

# Wrap in our interface
web_page = PlaywrightPage(page)

print("HTML loaded into browser")

HTML loaded into browser


In [4]:
# Convert to weblite format using new 0.2.0 API
element = await web_page.to_weblite()

if element:
    # Get the simplified representation
    result = element.to_dict(collapse_wrappers=True)
    print("Parse result:")
    print(to_compact_json(result))
else:
    print("No visible content found")

Parse result:
{
 "body": [
  {"h1": "Welcome"},
  {"p": "This is a simple test page."},
  {
   "div": [
    {"label": "Name:"},
    {"input": "@placeholder=Enter your name"},
    {"label": "Email:"},
    {"input": "@value=test@example.com"},
    {"button": "Submit"}
   ]
  },
  {
   "div": [
    {
     "div": [
      {"h3": "Product 1"},
      {"p": "$29.99"},
      {"button": "Buy Now"}
     ]
    },
    {
     "div": [
      {"h3": "Product 2"},
      {"p": "$39.99"},
      {"button": "Buy Now"}
     ]
    }
   ]
  }
 ]
}


In [5]:
# Try without collapsing wrappers
if element:
    result_full = element.to_dict(collapse_wrappers=False)
    print("Full structure (no wrapper collapsing):")
    print(to_compact_json(result_full))

Full structure (no wrapper collapsing):
{
 "body": [
  {"h1": "Welcome"},
  {"p": "This is a simple test page."},
  {
   "div": [
    {"label": "Name:"},
    {"input": "@placeholder=Enter your name"},
    {"label": "Email:"},
    {"input": "@value=test@example.com"},
    {"button": "Submit"}
   ]
  },
  {
   "div": [
    {
     "div": [
      {"h3": "Product 1"},
      {"p": "$29.99"},
      {"button": "Buy Now"}
     ]
    },
    {
     "div": [
      {"h3": "Product 2"},
      {"p": "$39.99"},
      {"button": "Buy Now"}
     ]
    }
   ]
  }
 ]
}


In [6]:
# Cleanup: Close browser
await browser.close()
await playwright_ctx.stop()
print("Browser closed!")

Browser closed!
