From 4506bbbb005918f0800ffeb72f0bb40256f0e6ac Mon Sep 17 00:00:00 2001 From: Francis Bouvier Date: Thu, 11 Apr 2024 18:58:44 +0200 Subject: [PATCH 1/4] Playwright: connect over CDP Signed-off-by: Francis Bouvier --- README.md | 41 ++++++++++++++++---- package.json | 2 +- playwright/{chrome.js => cdp.js} | 65 ++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 20 deletions(-) rename playwright/{chrome.js => cdp.js} (73%) diff --git a/README.md b/README.md index 2c8d3ec..7b0e978 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ $ /usr/bin/time -v ./browsercore-get --dump http://127.0.0.1:1234/campfire-comme ## Multiple requests using Playwright We compare now multiple page loads and js evaluations using -[Playwright](https://playwright.dev). +[Playwright](https://playwright.dev), which connects to the browser using CDP (Chrome Debug Protocol). ### Dependencies @@ -162,20 +162,28 @@ dependencies, mainly Playwright. You have also to install [Google Chrome](https://www.google.com/chrome/) and Lightpanda browser, but the code is not publicly available yet. -### Google Chrome benchmark - -We use Google Chrome version 123.0.6312.105. +### Running the benchmark The `playwright/chrome.js` benchmark accepts multiple env vars to be configured. -* `CHROME_PATH` is the path to your Google Chrome bin, -* `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`, +* `BROWSER_PATH` is the path to your browser implementing the CDP protocol. It can be either the path to a local binary or an URL (host:port) of a running browser. Default value is empty, which will launch the Google Chrome installed through Playwright. +* `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`. * `RUNS` is the number of pages loaded by the benchmark, default is `100`. `npm run bench-chrome` starts a playwright process, load a Google Chrome instance and load the page to extract data 100 times. ```console -$ CHROME_PATH=`which google-chrome` npm run bench-chrome +$ BROWSER_PATH=127.0.0.1:9222 npm run bench-cdp +``` + +### Results + +**Google Chrome*** + +We use Google Chrome version 123.0.6312.105. + +```console +$ npm run bench-cdp > demo@1.0.0 bench-chrome > node playwright/chrome.js @@ -190,3 +198,22 @@ max run duration (ms) 323 ``` ![aws.m5 Playwright with Google Chrome](./img/aws_m5_playwright_chrome.png) + +**Lightpanda*** + +Current version (commit X). + +```console +$ npm run bench-cdp + +> demo@1.0.0 bench-chrome +> node playwright/chrome.js + +................................................................................ +.................... +total runs 100 +total duration (ms) 18792 +avg run duration (ms) 184 +min run duration (ms) 168 +max run duration (ms) 323 +``` diff --git a/package.json b/package.json index 07a5133..2f3748a 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "scripts": { "install-chrome": "npx playwright install chrome", "ws": "go run ws/main.go", - "bench-chrome": "node playwright/chrome.js" + "bench-cdp": "node playwright/cdp.js" }, "repository": { "type": "git", diff --git a/playwright/chrome.js b/playwright/cdp.js similarity index 73% rename from playwright/chrome.js rename to playwright/cdp.js index 3cb842b..dce80f8 100644 --- a/playwright/chrome.js +++ b/playwright/cdp.js @@ -11,21 +11,39 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +import fs from 'fs'; // Import the Chromium browser into our scraper. import { chromium } from 'playwright'; -// options passed to the browser. -let browser_options = {}; - -// chrome browser path -if (process.env.CHROME_PATH) { - browser_options.executablePath = process.env.CHROME_PATH; +// check if browser path if a local path or an URL +let browserPath = process.env.BROWSER_PATH; +let networkPath; +if (browserPath) { + + // not local path + if (!fs.existsSync(browserPath)) { + if (!browserPath.startsWith("http://")) { + browserPath = "http://" + browserPath + } + const url = new URL(browserPath); + networkPath = url.host; + } } -// headless -if (process.env.HEADLESS) { - browser_options.headless = process.env.HEADLESS === 'true'; +// options passed to the browser +let browserOptions = {}; +if (!networkPath) { + + // chrome browser path + if (browserPath) { + browserOptions.executablePath = browserPath; + } + + // headless + if (process.env.HEADLESS) { + browserOptions.headless = process.env.HEADLESS === 'true'; + } } // web serveur url @@ -39,9 +57,32 @@ const gstart = process.hrtime.bigint(); // store all run durations let metrics = []; -// Open a Chromium browser. We use headless: false -// to be able to watch the browser window. -const browser = await chromium.launch(browser_options); +let browser; +if (networkPath) { + + // Connect to an existing browser + console.log("Connection to browser on " + networkPath + "..."); + + const resp = await fetch("http://" + networkPath + "/json/version"); + const version = await resp.json() + const wsURL = version.webSocketDebuggerUrl; + + browser = await chromium.connectOverCDP(wsURL); + +} else { + + // Launching a new browser + if (browserPath) { + console.log("Launching browser " + browserPath); + } else { + console.log("Launching browser"); + } + + // We use headless: false + // to be able to watch the browser window. + browser = await chromium.launch(browserOptions); + +} for (var run = 1; run<=runs; run++) { From 044c6bfa340199d5b6328eaa447410c03716576e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 29 May 2024 16:02:55 +0200 Subject: [PATCH 2/4] Apply suggestions from code review --- README.md | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 7b0e978..c2ea4e0 100644 --- a/README.md +++ b/README.md @@ -164,12 +164,12 @@ Lightpanda browser, but the code is not publicly available yet. ### Running the benchmark -The `playwright/chrome.js` benchmark accepts multiple env vars to be configured. +The `playwright/cdp.js` benchmark accepts multiple env vars to be configured. * `BROWSER_PATH` is the path to your browser implementing the CDP protocol. It can be either the path to a local binary or an URL (host:port) of a running browser. Default value is empty, which will launch the Google Chrome installed through Playwright. * `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`. * `RUNS` is the number of pages loaded by the benchmark, default is `100`. -`npm run bench-chrome` starts a playwright process, load a Google Chrome +`npm run bench-cdp` starts a playwright process instance and load the page to extract data 100 times. ```console @@ -198,22 +198,3 @@ max run duration (ms) 323 ``` ![aws.m5 Playwright with Google Chrome](./img/aws_m5_playwright_chrome.png) - -**Lightpanda*** - -Current version (commit X). - -```console -$ npm run bench-cdp - -> demo@1.0.0 bench-chrome -> node playwright/chrome.js - -................................................................................ -.................... -total runs 100 -total duration (ms) 18792 -avg run duration (ms) 184 -min run duration (ms) 168 -max run duration (ms) 323 -``` From 77f545365c2d948641a560a761eb38f87911fb46 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 29 May 2024 16:03:10 +0200 Subject: [PATCH 3/4] Apply suggestions from code review --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c2ea4e0..9ce7b2c 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ $ BROWSER_PATH=127.0.0.1:9222 npm run bench-cdp ### Results -**Google Chrome*** +**Google Chrome** We use Google Chrome version 123.0.6312.105. From e93e147485948b9e8a5be8b8e9bd51469168e53e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 29 May 2024 16:11:42 +0200 Subject: [PATCH 4/4] playwright: use BROWSER_ADDRESS env var only and remove the BROWSER_PATH --- README.md | 10 ++++++-- package.json | 1 - playwright/cdp.js | 61 ++++------------------------------------------- 3 files changed, 13 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 9ce7b2c..87be89b 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ Lightpanda browser, but the code is not publicly available yet. ### Running the benchmark The `playwright/cdp.js` benchmark accepts multiple env vars to be configured. -* `BROWSER_PATH` is the path to your browser implementing the CDP protocol. It can be either the path to a local binary or an URL (host:port) of a running browser. Default value is empty, which will launch the Google Chrome installed through Playwright. +* `BROWSER_ADDRESS` is the address of the running browser listening the CDP protocol, by default `http://127.0.0.1:9222`. * `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`. * `RUNS` is the number of pages loaded by the benchmark, default is `100`. @@ -173,7 +173,7 @@ The `playwright/cdp.js` benchmark accepts multiple env vars to be configured. instance and load the page to extract data 100 times. ```console -$ BROWSER_PATH=127.0.0.1:9222 npm run bench-cdp +$ npm run bench-cdp ``` ### Results @@ -182,6 +182,12 @@ $ BROWSER_PATH=127.0.0.1:9222 npm run bench-cdp We use Google Chrome version 123.0.6312.105. +You have to start the browser first. +```console +$ google-chrome --headless=new --disable-gpu --remote-debugging-port=9222 +``` + +Then you can run the benchmark. ```console $ npm run bench-cdp diff --git a/package.json b/package.json index 2f3748a..600c224 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,6 @@ "description": "Lightpanda browser demo", "main": "index.js", "scripts": { - "install-chrome": "npx playwright install chrome", "ws": "go run ws/main.go", "bench-cdp": "node playwright/cdp.js" }, diff --git a/playwright/cdp.js b/playwright/cdp.js index dce80f8..06b6d20 100644 --- a/playwright/cdp.js +++ b/playwright/cdp.js @@ -11,40 +11,12 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -import fs from 'fs'; // Import the Chromium browser into our scraper. import { chromium } from 'playwright'; -// check if browser path if a local path or an URL -let browserPath = process.env.BROWSER_PATH; -let networkPath; -if (browserPath) { - - // not local path - if (!fs.existsSync(browserPath)) { - if (!browserPath.startsWith("http://")) { - browserPath = "http://" + browserPath - } - const url = new URL(browserPath); - networkPath = url.host; - } -} - -// options passed to the browser -let browserOptions = {}; -if (!networkPath) { - - // chrome browser path - if (browserPath) { - browserOptions.executablePath = browserPath; - } - - // headless - if (process.env.HEADLESS) { - browserOptions.headless = process.env.HEADLESS === 'true'; - } -} +// browserAddress +const browserAddress = process.env.BROWSER_ADDRESS ? process.env.BROWSER_ADDRESS : 'http://127.0.0.1:9222'; // web serveur url const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'http://127.0.0.1:1234'; @@ -57,32 +29,9 @@ const gstart = process.hrtime.bigint(); // store all run durations let metrics = []; -let browser; -if (networkPath) { - - // Connect to an existing browser - console.log("Connection to browser on " + networkPath + "..."); - - const resp = await fetch("http://" + networkPath + "/json/version"); - const version = await resp.json() - const wsURL = version.webSocketDebuggerUrl; - - browser = await chromium.connectOverCDP(wsURL); - -} else { - - // Launching a new browser - if (browserPath) { - console.log("Launching browser " + browserPath); - } else { - console.log("Launching browser"); - } - - // We use headless: false - // to be able to watch the browser window. - browser = await chromium.launch(browserOptions); - -} +// Connect to an existing browser +console.log("Connection to browser on " + browserAddress); +const browser = await chromium.connectOverCDP(browserAddress); for (var run = 1; run<=runs; run++) {