diff --git a/.gitignore b/.gitignore index 1b556a6..0efc690 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /ws/ws +/node_modules diff --git a/README.md b/README.md index 8c5228a..2c8d3ec 100644 --- a/README.md +++ b/README.md @@ -53,18 +53,18 @@ By default it exposes the `public` dir using the `1234` port. $ go run ws/main.go ``` -## Single request - -This bench is a very basic test to compare the two software. -We start the browser and request the fake web page once with full JS execution. The final DOMTree is -rendered in stdout. - ### Test machine The tests are run in an AWS m5.large (x86_64) with a fresh Debian install. ![aws.m5 neofetch](./img/aws_m5_neofetch.png) +## Single request + +This bench is a very basic test to compare the two software. +We start the browser and request the fake web page once with full JS execution. The final DOMTree is +rendered in stdout. + We use Google Chrome version 122.0.6261.94. ```console @@ -147,7 +147,46 @@ $ /usr/bin/time -v ./browsercore-get --dump http://127.0.0.1:1234/campfire-comme Exit status: 0 ``` -## Multiple requests +## Multiple requests using Playwright + +We compare now multiple page loads and js evaluations using +[Playwright](https://playwright.dev). + +### Dependencies + +To run the benchmark, you need ti install [nodejs](https://nodejs.org/en/download). + +Once `nodejs` is installed, please run a `npm install` to install nodejs +dependencies, mainly Playwright. + +You have also to install [Google Chrome](https://www.google.com/chrome/) and +Lightpanda browser, but the code is not publicly available yet. + +### Google Chrome benchmark + +We use Google Chrome version 123.0.6312.105. + +The `playwright/chrome.js` benchmark accepts multiple env vars to be configured. +* `CHROME_PATH` is the path to your Google Chrome bin, +* `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`, +* `RUNS` is the number of pages loaded by the benchmark, default is `100`. + +`npm run bench-chrome` starts a playwright process, load a Google Chrome +instance and load the page to extract data 100 times. + +```console +$ CHROME_PATH=`which google-chrome` npm run bench-chrome + +> demo@1.0.0 bench-chrome +> node playwright/chrome.js + +................................................................................ +.................... +total runs 100 +total duration (ms) 18792 +avg run duration (ms) 184 +min run duration (ms) 168 +max run duration (ms) 323 +``` -We plan to create a benchmark to compare the memory used during multiple -successive requests sent to a CDP server. +![aws.m5 Playwright with Google Chrome](./img/aws_m5_playwright_chrome.png) diff --git a/img/aws_m5_playwright_chrome.png b/img/aws_m5_playwright_chrome.png new file mode 100644 index 0000000..217dda0 Binary files /dev/null and b/img/aws_m5_playwright_chrome.png differ diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..ac3fc58 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,57 @@ +{ + "name": "demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "demo", + "version": "1.0.0", + "license": "Apache 2", + "dependencies": { + "playwright": "^1.42.1" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.42.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.42.1.tgz", + "integrity": "sha512-PgwB03s2DZBcNRoW+1w9E+VkLBxweib6KTXM0M3tkiT4jVxKSi6PmVJ591J+0u10LUrgxB7dLRbiJqO5s2QPMg==", + "dependencies": { + "playwright-core": "1.42.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=16" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.42.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.42.1.tgz", + "integrity": "sha512-mxz6zclokgrke9p1vtdy/COWBH+eOZgYUVVU34C73M+4j4HLlQJHtfcqiqqxpP0o8HhMkflvfbquLX5dg6wlfA==", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=16" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..07a5133 --- /dev/null +++ b/package.json @@ -0,0 +1,26 @@ +{ + "type": "module", + "name": "demo", + "version": "1.0.0", + "description": "Lightpanda browser demo", + "main": "index.js", + "scripts": { + "install-chrome": "npx playwright install chrome", + "ws": "go run ws/main.go", + "bench-chrome": "node playwright/chrome.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/lightpanda-io/demo.git" + }, + "keywords": [], + "author": "Lightpanda", + "license": "Apache 2", + "bugs": { + "url": "https://github.com/lightpanda-io/demo/issues" + }, + "homepage": "https://lightpanda.io", + "dependencies": { + "playwright": "^1.42.1" + } +} diff --git a/playwright/chrome.js b/playwright/chrome.js new file mode 100644 index 0000000..8eaf593 --- /dev/null +++ b/playwright/chrome.js @@ -0,0 +1,113 @@ +// Import the Chromium browser into our scraper. +import { chromium } from 'playwright'; + +// options passed to the browser. +let browser_options = {}; + +// chrome browser path +if (process.env.CHROME_PATH) { + browser_options.executablePath = process.env.CHROME_PATH; +} + +// headless +if (process.env.HEADLESS) { + browser_options.headless = process.env.HEADLESS === 'true'; +} + +// web serveur url +const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'http://127.0.0.1:1234'; + +// runs +const runs = process.env.RUNS ? parseInt(process.env.RUNS) : 100; + +// measure general time. +const gstart = process.hrtime.bigint(); +// store all run durations +let metrics = []; + +// Open a Chromium browser. We use headless: false +// to be able to watch the browser window. +const browser = await chromium.launch(browser_options); + +for (var run = 1; run<=runs; run++) { + + // measure run time. + const rstart = process.hrtime.bigint(); + + const context = await browser.newContext({ + baseURL: baseURL, + }); + + const page = await context.newPage(); + await page.goto('/campfire-commerce'); + + // ensure the price is loaded. + await page.waitForFunction(() => { + const price = document.querySelector('#product-price'); + return price.textContent.length > 0; + }); + + + // ensure the reviews are loaded. + await page.waitForFunction(() => { + const reviews = document.querySelectorAll('#product-reviews > div'); + return reviews.length > 0; + }); + + let res = {}; + + res.name = await page.locator('#product-name').textContent(); + res.price = parseFloat((await page.locator('#product-price').textContent()).substring(1)); + res.description = await page.locator('#product-description').textContent(); + res.features = await page.locator('#product-features > li').allTextContents(); + res.image = await page.locator('#product-image').getAttribute('src'); + + let related = []; + var i = 0; + for (const row of await page.locator('#product-related > div').all()) { + related[i++] = { + name: await row.locator('h4').textContent(), + price: parseFloat((await row.locator('p').textContent()).substring(1)), + image: await row.locator('img').getAttribute('src'), + }; + } + res.related = related; + + let reviews = []; + var i =0; + for (const row of await page.locator('#product-reviews > div').all()) { + reviews[i++] = { + title: await row.locator('h4').textContent(), + text: await row.locator('p').textContent(), + }; + } + res.reviews = reviews; + + // console.log(res); + + process.stderr.write('.'); + if(run % 80 == 0) process.stderr.write('\n'); + + await page.close(); + await context.close(); + + metrics[run] = process.hrtime.bigint() - rstart; +} + +// Turn off the browser to clean up after ourselves. +await browser.close(); + +const gduration = process.hrtime.bigint() - gstart; + +process.stderr.write('\n'); + +const avg = metrics.reduce((s, a) => s += a) / BigInt(metrics.length); +const min = metrics.reduce((s, a) => a < s ? a : s); +const max = metrics.reduce((s, a) => a > s ? a : s); + +console.log('total runs', runs); +console.log('total duration (ms)', (gduration/1000000n).toString()); +console.log('avg run duration (ms)', (avg/1000000n).toString()); +console.log('min run duration (ms)', (min/1000000n).toString()); +console.log('max run duration (ms)', (max/1000000n).toString()); +