From b4fe0a66e16f421b329f2ba7b3ccbc5af0db6eb6 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sun, 7 Sep 2025 01:33:06 +0200 Subject: [PATCH 1/9] feat(screenshot): better full page --- packages/screenshot/src/index.js | 87 +++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 19 deletions(-) diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index 8b20a6d11..85e139698 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -32,6 +32,47 @@ const waitForImagesOnViewport = page => ) ) +const scrollFullPageToLoadContent = async (page, timeout) => { + await page.evaluate(timeout => { + return new Promise(resolve => { + let totalHeight = 0 + const distance = Math.floor(window.innerHeight * 0.33) + const waitForContent = () => { + const startTime = Date.now() + const checkContent = () => { + const scrollHeight = document.body ? document.body.scrollHeight : 0 + const viewportHeight = window.innerHeight + const elapsed = Date.now() - startTime + + if (scrollHeight > viewportHeight) { + startScrolling() + } else if (elapsed >= timeout / 2) { + startScrolling() + } else { + setTimeout(checkContent, 50) + } + } + checkContent() + } + + const startScrolling = async () => { + const scrollHeight = document.body.scrollHeight + const expectedSteps = Math.ceil(scrollHeight / distance) + const scrollDelay = timeout / 2 / expectedSteps + while (totalHeight < document.body.scrollHeight) { + window.scrollBy(0, distance) + totalHeight += distance + await new Promise(resolve => setTimeout(resolve, scrollDelay)) + } + resolve() + } + + waitForContent() + }) + }, timeout) + await page.evaluate(() => window.scrollTo(0, 0)) +} + const waitForElement = async (page, element) => { const screenshotOpts = {} @@ -62,24 +103,32 @@ module.exports = ({ goto, ...gotoOpts }) => { let screenshot let response - const beforeScreenshot = response => { + const beforeScreenshot = (response, isFullPage = false) => { const timeout = goto.timeouts.action(goto.timeouts.base(opts.timeout)) - return Promise.all( - [ - { - fn: () => page.evaluate('document.fonts.ready'), - debug: 'beforeScreenshot:fontsReady' - }, - { - fn: () => waitForPrism(page, response, { codeScheme, ...opts }), - debug: 'beforeScreenshot:waitForPrism' - }, - { - fn: () => waitForImagesOnViewport(page), - debug: 'beforeScreenshot:waitForImagesOnViewport' - } - ].map(({ fn, ...opts }) => goto.run({ fn: fn(), ...opts, timeout })) - ) + const tasks = [ + { + fn: () => page.evaluate('document.fonts.ready'), + debug: 'beforeScreenshot:fontsReady' + }, + { + fn: () => waitForPrism(page, response, { codeScheme, ...opts }), + debug: 'beforeScreenshot:waitForPrism' + }, + { + fn: () => waitForImagesOnViewport(page), + debug: 'beforeScreenshot:waitForImagesOnViewport' + } + ] + + // Add full page scrolling for better content loading when taking full page screenshots + if (isFullPage) { + tasks.push({ + fn: () => scrollFullPageToLoadContent(page, timeout), + debug: 'beforeScreenshot:scrollFullPageToLoadContent' + }) + } + + return Promise.all(tasks.map(({ fn, ...opts }) => goto.run({ fn: fn(), ...opts, timeout }))) } const takeScreenshot = async opts => { @@ -100,7 +149,7 @@ module.exports = ({ goto, ...gotoOpts }) => { ;({ response } = await goto(page, { ...opts, url, waitUntil })) const [screenshotOpts] = await Promise.all([ waitForElement(page, element), - beforeScreenshot(response) + beforeScreenshot(response, !element && opts.fullPage !== false) ]) screenshot = await page.screenshot({ ...opts, ...screenshotOpts }) debug('screenshot', { waitUntil, duration: timeScreenshot() }) @@ -109,7 +158,7 @@ module.exports = ({ goto, ...gotoOpts }) => { async function waitUntilAuto (page, { response }) { const [screenshotOpts] = await Promise.all([ waitForElement(page, element), - beforeScreenshot(response) + beforeScreenshot(response, !element && opts.fullPage !== false) ]) const { isWhite } = await takeScreenshot({ ...opts, ...screenshotOpts }) debug('screenshot', { waitUntil, isWhite, duration: timeScreenshot() }) From 0ce31974fda50e6cf26fb04a362631df3e11e6c1 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sun, 7 Sep 2025 11:33:53 +0200 Subject: [PATCH 2/9] refactor: better tasks definition --- packages/screenshot/src/index.js | 47 ++++++++++++------------- packages/screenshot/src/pretty/index.js | 2 -- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index 85e139698..a05737a6e 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -75,14 +75,12 @@ const scrollFullPageToLoadContent = async (page, timeout) => { const waitForElement = async (page, element) => { const screenshotOpts = {} - if (element) { await page.waitForSelector(element, { visible: true }) screenshotOpts.clip = await page.$eval(element, getBoundingClientRect) screenshotOpts.fullPage = false return screenshotOpts } - return screenshotOpts } @@ -92,43 +90,48 @@ module.exports = ({ goto, ...gotoOpts }) => { return function screenshot (page) { return async ( url, - { - element, - codeScheme = 'atom-dark', - overlay: overlayOpts = {}, - waitUntil = 'auto', - ...opts - } = {} + { codeScheme = 'atom-dark', overlay: overlayOpts = {}, waitUntil = 'auto', ...opts } = {} ) => { let screenshot let response - const beforeScreenshot = (response, isFullPage = false) => { + const beforeScreenshot = async (page, response, { element, fullPage = false } = {}) => { const timeout = goto.timeouts.action(goto.timeouts.base(opts.timeout)) + let screenshotOpts = {} const tasks = [ { fn: () => page.evaluate('document.fonts.ready'), debug: 'beforeScreenshot:fontsReady' }, - { - fn: () => waitForPrism(page, response, { codeScheme, ...opts }), - debug: 'beforeScreenshot:waitForPrism' - }, { fn: () => waitForImagesOnViewport(page), debug: 'beforeScreenshot:waitForImagesOnViewport' } ] - // Add full page scrolling for better content loading when taking full page screenshots - if (isFullPage) { + if (codeScheme && response) { + tasks.push({ + fn: () => waitForPrism(page, response, { codeScheme, ...opts }), + debug: 'beforeScreenshot:waitForPrism' + }) + } + + if (fullPage) { tasks.push({ fn: () => scrollFullPageToLoadContent(page, timeout), debug: 'beforeScreenshot:scrollFullPageToLoadContent' }) + } else if (element) { + tasks.push({ + fn: async () => { + screenshotOpts = await waitForElement(page, element) + }, + debug: 'beforeScreenshot:waitForElement' + }) } - return Promise.all(tasks.map(({ fn, ...opts }) => goto.run({ fn: fn(), ...opts, timeout }))) + await Promise.all(tasks.map(({ fn, ...opts }) => goto.run({ fn: fn(), ...opts, timeout }))) + return screenshotOpts } const takeScreenshot = async opts => { @@ -147,19 +150,13 @@ module.exports = ({ goto, ...gotoOpts }) => { if (waitUntil !== 'auto') { ;({ response } = await goto(page, { ...opts, url, waitUntil })) - const [screenshotOpts] = await Promise.all([ - waitForElement(page, element), - beforeScreenshot(response, !element && opts.fullPage !== false) - ]) + const screenshotOpts = await beforeScreenshot(page, response, opts) screenshot = await page.screenshot({ ...opts, ...screenshotOpts }) debug('screenshot', { waitUntil, duration: timeScreenshot() }) } else { ;({ response } = await goto(page, { ...opts, url, waitUntil, waitUntilAuto })) async function waitUntilAuto (page, { response }) { - const [screenshotOpts] = await Promise.all([ - waitForElement(page, element), - beforeScreenshot(response, !element && opts.fullPage !== false) - ]) + const screenshotOpts = await beforeScreenshot(page, response, opts) const { isWhite } = await takeScreenshot({ ...opts, ...screenshotOpts }) debug('screenshot', { waitUntil, isWhite, duration: timeScreenshot() }) } diff --git a/packages/screenshot/src/pretty/index.js b/packages/screenshot/src/pretty/index.js index 3387a12d2..870a2423b 100644 --- a/packages/screenshot/src/pretty/index.js +++ b/packages/screenshot/src/pretty/index.js @@ -32,8 +32,6 @@ const JSONParse = input => { } module.exports = async (page, response, { timeout, codeScheme, styles, scripts, modules }) => { - if (!response || !codeScheme) return - let [theme, content, prism] = await Promise.all([getTheme(codeScheme), response.text(), getPrism]) if (isHtmlContent(content)) return From 599505bf6e810ca726ebd51021742cd46de9d8c0 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 10:28:47 +0200 Subject: [PATCH 3/9] refactor: 3 steps --- packages/screenshot/src/index.js | 64 ++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index a05737a6e..59316029d 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -33,43 +33,53 @@ const waitForImagesOnViewport = page => ) const scrollFullPageToLoadContent = async (page, timeout) => { + // Wait for initial content to be ready await page.evaluate(timeout => { return new Promise(resolve => { - let totalHeight = 0 - const distance = Math.floor(window.innerHeight * 0.33) - const waitForContent = () => { - const startTime = Date.now() - const checkContent = () => { - const scrollHeight = document.body ? document.body.scrollHeight : 0 - const viewportHeight = window.innerHeight - const elapsed = Date.now() - startTime - - if (scrollHeight > viewportHeight) { - startScrolling() - } else if (elapsed >= timeout / 2) { - startScrolling() - } else { - setTimeout(checkContent, 50) - } + const startTime = Date.now() + const maxWaitTime = timeout / 2 + + const checkContent = () => { + const scrollHeight = document.body?.scrollHeight || 0 + const viewportHeight = window.innerHeight + const elapsed = Date.now() - startTime + if (scrollHeight > viewportHeight || elapsed >= maxWaitTime) { + resolve() + } else { + setTimeout(checkContent, 100) } - checkContent() } - const startScrolling = async () => { - const scrollHeight = document.body.scrollHeight - const expectedSteps = Math.ceil(scrollHeight / distance) - const scrollDelay = timeout / 2 / expectedSteps - while (totalHeight < document.body.scrollHeight) { - window.scrollBy(0, distance) - totalHeight += distance - await new Promise(resolve => setTimeout(resolve, scrollDelay)) + checkContent() + }) + }, timeout) + + // then, scroll the page to load the content + await page.evaluate(timeout => { + return new Promise(resolve => { + let currentScrollPosition = 0 + const scrollStep = Math.floor(window.innerHeight * 0.5) // 50% of viewport + const pageHeight = document.body.scrollHeight + const totalSteps = Math.ceil(pageHeight / scrollStep) + const stepDelay = timeout / 2 / totalSteps + + const scrollNext = async () => { + if (currentScrollPosition >= pageHeight) { + resolve() + return } - resolve() + + window.scrollBy(0, scrollStep) + currentScrollPosition += scrollStep + + setTimeout(scrollNext, stepDelay) } - waitForContent() + scrollNext() }) }, timeout) + + // finally, scroll back to top await page.evaluate(() => window.scrollTo(0, 0)) } From b8c36cf2388fa08f364948fa50524190d8d9a4c0 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 21:44:10 +0200 Subject: [PATCH 4/9] chore: add waitForDomStability --- packages/screenshot/src/index.js | 82 +++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index 59316029d..ebe39ed1e 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -32,54 +32,68 @@ const waitForImagesOnViewport = page => ) ) -const scrollFullPageToLoadContent = async (page, timeout) => { - // Wait for initial content to be ready - await page.evaluate(timeout => { - return new Promise(resolve => { - const startTime = Date.now() - const maxWaitTime = timeout / 2 - - const checkContent = () => { - const scrollHeight = document.body?.scrollHeight || 0 - const viewportHeight = window.innerHeight - const elapsed = Date.now() - startTime - if (scrollHeight > viewportHeight || elapsed >= maxWaitTime) { - resolve() - } else { - setTimeout(checkContent, 100) - } +const waitForDomStability = ({ idle, timeout } = {}) => + new Promise(resolve => { + const target = document.body + if (!target) return resolve({ status: 'no-body' }) + + let lastChange = performance.now() + const observer = new window.MutationObserver(() => { + lastChange = performance.now() + }) + observer.observe(target, { + childList: true, + subtree: true, + attributes: false, + characterData: false + }) + + const deadline = performance.now() + timeout + + ;(function check () { + const now = performance.now() + if (now - lastChange >= idle) { + observer.disconnect() + return resolve({ status: 'idle' }) + } + if (now >= deadline) { + observer.disconnect() + return resolve({ status: 'timeout' }) } + window.requestAnimationFrame(check) + })() + }) - checkContent() - }) - }, timeout) +const scrollFullPageToLoadContent = async (page, timeout, goto) => { + const debug = require('debug-logfmt')('browserless:goto') + + const duration = debug.duration() + const result = await page.evaluate(waitForDomStability, { + idle: timeout / 2 / 2, + timeout: timeout / 2 + }) + + duration('waitForDomStability', result) - // then, scroll the page to load the content await page.evaluate(timeout => { return new Promise(resolve => { let currentScrollPosition = 0 - const scrollStep = Math.floor(window.innerHeight * 0.5) // 50% of viewport + const scrollStep = Math.floor(window.innerHeight) const pageHeight = document.body.scrollHeight const totalSteps = Math.ceil(pageHeight / scrollStep) const stepDelay = timeout / 2 / totalSteps - const scrollNext = async () => { if (currentScrollPosition >= pageHeight) { resolve() return } - window.scrollBy(0, scrollStep) currentScrollPosition += scrollStep - setTimeout(scrollNext, stepDelay) } - scrollNext() }) }, timeout) - - // finally, scroll back to top await page.evaluate(() => window.scrollTo(0, 0)) } @@ -107,6 +121,7 @@ module.exports = ({ goto, ...gotoOpts }) => { const beforeScreenshot = async (page, response, { element, fullPage = false } = {}) => { const timeout = goto.timeouts.action(goto.timeouts.base(opts.timeout)) + let screenshotOpts = {} const tasks = [ { @@ -128,7 +143,7 @@ module.exports = ({ goto, ...gotoOpts }) => { if (fullPage) { tasks.push({ - fn: () => scrollFullPageToLoadContent(page, timeout), + fn: () => scrollFullPageToLoadContent(page, timeout, goto), debug: 'beforeScreenshot:scrollFullPageToLoadContent' }) } else if (element) { @@ -140,7 +155,16 @@ module.exports = ({ goto, ...gotoOpts }) => { }) } - await Promise.all(tasks.map(({ fn, ...opts }) => goto.run({ fn: fn(), ...opts, timeout }))) + await Promise.all( + tasks.map(({ fn, ...opts }) => + goto.run({ + fn: fn(), + ...opts, + timeout: fullPage ? timeout * 2 : timeout + }) + ) + ) + return screenshotOpts } From 608bcc664ced9902dd77b2b1c5717327d12c0375 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 21:44:23 +0200 Subject: [PATCH 5/9] refactor: use debug.duration --- packages/goto/package.json | 1 - packages/goto/src/index.js | 9 +++------ packages/screenshot/src/index.js | 5 ++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/goto/package.json b/packages/goto/package.json index fc49764be..a74abb5eb 100644 --- a/packages/goto/package.json +++ b/packages/goto/package.json @@ -31,7 +31,6 @@ "dependencies": { "@browserless/devices": "^10.7.13", "@ghostery/adblocker-puppeteer": "~2.11.3", - "@kikobeats/time-span": "~1.0.8", "debug-logfmt": "~1.4.0", "got": "~11.8.6", "is-url-http": "~2.3.10", diff --git a/packages/goto/src/index.js b/packages/goto/src/index.js index 356cd81cb..fb174c118 100644 --- a/packages/goto/src/index.js +++ b/packages/goto/src/index.js @@ -11,8 +11,6 @@ const isUrl = require('is-url-http') const path = require('path') const fs = require('fs') -const timeSpan = require('@kikobeats/time-span')({ format: require('pretty-ms') }) - const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer') const debug = require('debug-logfmt')('browserless:goto') @@ -34,11 +32,10 @@ const isEmpty = val => val == null || !(Object.keys(val) || val).length const castArray = value => [].concat(value).filter(Boolean) const run = async ({ fn, timeout, debug: props }) => { - const debugProps = { duration: timeSpan() } + const duration = debug.duration() const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn) - debugProps.duration = debugProps.duration() - if (result.isRejected) debugProps.error = result.reason.message || result.reason - debug(props, debugProps) + if (result.isRejected) props.error = result.reason.message || result.reason + duration(props) return result } diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index ebe39ed1e..bae87fde1 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -34,14 +34,13 @@ const waitForImagesOnViewport = page => const waitForDomStability = ({ idle, timeout } = {}) => new Promise(resolve => { - const target = document.body - if (!target) return resolve({ status: 'no-body' }) + if (!document.body) return resolve({ status: 'no-body' }) let lastChange = performance.now() const observer = new window.MutationObserver(() => { lastChange = performance.now() }) - observer.observe(target, { + observer.observe(document.body, { childList: true, subtree: true, attributes: false, From 9e8ba5782cf8c92a82c17ffaed7e3b5debbd2684 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 22:33:19 +0200 Subject: [PATCH 6/9] fix: attach props --- packages/goto/src/index.js | 4 ++-- packages/screenshot/src/index.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/goto/src/index.js b/packages/goto/src/index.js index fb174c118..bef543af6 100644 --- a/packages/goto/src/index.js +++ b/packages/goto/src/index.js @@ -34,8 +34,8 @@ const castArray = value => [].concat(value).filter(Boolean) const run = async ({ fn, timeout, debug: props }) => { const duration = debug.duration() const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn) - if (result.isRejected) props.error = result.reason.message || result.reason - duration(props) + const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : '' + duration(props, errorProps) return result } diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index bae87fde1..f07bf2499 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -63,7 +63,7 @@ const waitForDomStability = ({ idle, timeout } = {}) => })() }) -const scrollFullPageToLoadContent = async (page, timeout, goto) => { +const scrollFullPageToLoadContent = async (page, timeout) => { const debug = require('debug-logfmt')('browserless:goto') const duration = debug.duration() @@ -119,7 +119,7 @@ module.exports = ({ goto, ...gotoOpts }) => { let response const beforeScreenshot = async (page, response, { element, fullPage = false } = {}) => { - const timeout = goto.timeouts.action(goto.timeouts.base(opts.timeout)) + const timeout = goto.timeouts.action(opts.timeout) let screenshotOpts = {} const tasks = [ From cad04ba26b3559a82375f5e4b4d1d9d0ffd8c97c Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 23:04:03 +0200 Subject: [PATCH 7/9] Update index.js --- packages/goto/src/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/goto/src/index.js b/packages/goto/src/index.js index bef543af6..7ab74d12b 100644 --- a/packages/goto/src/index.js +++ b/packages/goto/src/index.js @@ -34,7 +34,7 @@ const castArray = value => [].concat(value).filter(Boolean) const run = async ({ fn, timeout, debug: props }) => { const duration = debug.duration() const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn) - const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : '' + const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : false duration(props, errorProps) return result } From b0b0916f1e8808be7449aec7608c4abd81c57769 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 23:04:35 +0200 Subject: [PATCH 8/9] Update index.js --- packages/goto/src/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/goto/src/index.js b/packages/goto/src/index.js index 7ab74d12b..a2defc6ed 100644 --- a/packages/goto/src/index.js +++ b/packages/goto/src/index.js @@ -34,7 +34,7 @@ const castArray = value => [].concat(value).filter(Boolean) const run = async ({ fn, timeout, debug: props }) => { const duration = debug.duration() const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn) - const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : false + const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : {} duration(props, errorProps) return result } From 336a0451f45555e40ebc79619b4e327ee2827563 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 9 Sep 2025 23:05:35 +0200 Subject: [PATCH 9/9] refactor: remove brackets --- packages/screenshot/src/index.js | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/packages/screenshot/src/index.js b/packages/screenshot/src/index.js index f07bf2499..514ca8643 100644 --- a/packages/screenshot/src/index.js +++ b/packages/screenshot/src/index.js @@ -74,25 +74,27 @@ const scrollFullPageToLoadContent = async (page, timeout) => { duration('waitForDomStability', result) - await page.evaluate(timeout => { - return new Promise(resolve => { - let currentScrollPosition = 0 - const scrollStep = Math.floor(window.innerHeight) - const pageHeight = document.body.scrollHeight - const totalSteps = Math.ceil(pageHeight / scrollStep) - const stepDelay = timeout / 2 / totalSteps - const scrollNext = async () => { - if (currentScrollPosition >= pageHeight) { - resolve() - return + await page.evaluate( + timeout => + new Promise(resolve => { + let currentScrollPosition = 0 + const scrollStep = Math.floor(window.innerHeight) + const pageHeight = document.body.scrollHeight + const totalSteps = Math.ceil(pageHeight / scrollStep) + const stepDelay = timeout / 2 / totalSteps + const scrollNext = async () => { + if (currentScrollPosition >= pageHeight) { + resolve() + return + } + window.scrollBy(0, scrollStep) + currentScrollPosition += scrollStep + setTimeout(scrollNext, stepDelay) } - window.scrollBy(0, scrollStep) - currentScrollPosition += scrollStep - setTimeout(scrollNext, stepDelay) - } - scrollNext() - }) - }, timeout) + scrollNext() + }), + timeout + ) await page.evaluate(() => window.scrollTo(0, 0)) }