-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Description
@mdmintz
I'm trying to run a Selenium script using driver.get(url) in a Lambda or serverless environment.
Environment Setup:
Chrome binary location: /opt/chrome/chrome
Chromedriver location: /var/task/chromedriver
If ChromeDriver gets downloaded dynamically, it should go to: /tmp/seleniumbase
I have ensured that:
Chrome is copied correctly to /opt/chrome/chrome
Chromedriver is present at /var/task/chromedriver
Here are my logs and code
Logs
[INFO] 2025-07-23T13:10:10.472Z Found credentials in environment variables.
START RequestId: cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Version: $LATEST
[INFO] 2025-07-23T13:10:16.736Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Chrome binary exists: True
[INFO] 2025-07-23T13:10:16.736Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Chrome binary is executable: True
[INFO] 2025-07-23T13:10:35.660Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Chrome binary (/opt/chrome/chrome) | Exists: True | Executable: True | Version: Google Chrome for Testing 138.0.7204.102
[INFO] 2025-07-23T13:10:35.663Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Chromedriver (/var/task/seleniumbase_driver/chromedriver) | Exists: True | Executable: True | Version: ChromeDriver 138.0.7204.102 (f9f664b8a827bbd641377218cc3140686f1385ee-refs/branch-heads/7204_50@{#23})
[INFO] 2025-07-23T13:10:35.666Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Chromedriver (/tmp/seleniumbase_driver/chromedriver) | Exists: True | Executable: True | Version: ChromeDriver 138.0.7204.102 (f9f664b8a827bbd641377218cc3140686f1385ee-refs/branch-heads/7204_50@{#23})
[INFO] 2025-07-23T13:11:04.990Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Browser launched successfully
[INFO] 2025-07-23T13:11:04.990Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Current session is d3e15261d940349ad71e03c5aad63893
[INFO] 2025-07-23T13:11:04.990Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Opening page attempt 1
[2025-07-23 13:11:11,510] [INFO] Driver session ID: d3e15261d940349ad71e03c5aad63893
[INFO] 2025-07-23T13:11:11.510Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Driver session ID: d3e15261d940349ad71e03c5aad63893
[2025-07-23 13:11:13,370] [INFO] getting url in headless mode
[INFO] 2025-07-23T13:11:13.370Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 getting url in headless mode
[2025-07-23 13:11:16,750] [ERROR] Driver seems dead: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
(Session info: chrome=138.0.7204.102); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
#0 0x55702f8211ea
#1 0x55702f2cbab0
#2 0x55702f2b26fe
#3 0x55702f2da669
#4 0x55702f34b68f
#5 0x55702f368992
#6 0x55702f342f73
#7 0x55702f30faeb
#8 0x55702f310751
#9 0x55702f7e5afb
#10 0x55702f7e98d9
#11 0x55702f7cc8d9
#12 0x55702f7ea498
#13 0x55702f7b108f
#14 0x55702f80e898
#15 0x55702f80ea76
#16 0x55702f820506
#17 0x7f8fb653744b start_thread
[ERROR] 2025-07-23T13:11:16.750Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Driver seems dead: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
(Session info: chrome=138.0.7204.102); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
#0 0x55702f8211ea
#1 0x55702f2cbab0
#2 0x55702f2b26fe
#3 0x55702f2da669
#4 0x55702f34b68f
#5 0x55702f368992
#6 0x55702f342f73
#7 0x55702f30faeb
#8 0x55702f310751
#9 0x55702f7e5afb
#10 0x55702f7e98d9
#11 0x55702f7cc8d9
#12 0x55702f7ea498
#13 0x55702f7b108f
#14 0x55702f80e898
#15 0x55702f80ea76
#16 0x55702f820506
#17 0x7f8fb653744b start_thread
[INFO] 2025-07-23T13:11:16.750Z cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Found review data: []
END RequestId: cb79c2ec-d60a-4315-92f2-8994d1ba49a1
REPORT RequestId: cb79c2ec-d60a-4315-92f2-8994d1ba49a1 Duration: 66129.39 ms Billed Duration: 70278 ms Memory Size: 2048 MB Max Memory Used: 1204 MB Init Duration: 4148.58 ms
Code:
def scrape_website(url):
driver = None
try:
source_dir = "/var/task/seleniumbase_driver"
target_dir = "/tmp/seleniumbase_driver"
os.makedirs(target_dir, exist_ok=True)
os.makedirs("/tmp/downloaded_files", exist_ok=True)
os.makedirs("/tmp/seleniumbase_cache", exist_ok=True)
os.makedirs("/tmp/.cache/selenium", exist_ok=True)
os.makedirs("/tmp/chrome_profile", exist_ok=True)
unique_profile_dir = f"/tmp/chrome-profile-{uuid.uuid4()}"
os.makedirs(unique_profile_dir, exist_ok=True)
shutil.copy(os.path.join(source_dir, "chromedriver"), target_dir)
shutil.copy(os.path.join(source_dir, "uc_driver"), target_dir)
os.chmod(os.path.join(target_dir, "chromedriver"), 0o755)
os.chmod(os.path.join(target_dir, "uc_driver"), 0o755)
os.environ["SELENIUMBASE_DRIVER_PATH"] = target_dir
os.environ["UC_CHROMEDRIVER_PATH"] = os.path.join(target_dir, "chromedriver")
os.environ["UC_SKIP_DOWNLOAD"] = "true"
os.environ["SELENIUM_MANAGER_DISABLE"] = "1"
os.environ["SELENIUMBASE_DOWNLOADS_FOLDER"] = "/tmp/downloaded_files"
os.environ["SELENIUMBASE_CACHE_PATH"] = "/tmp/seleniumbase_cache"
os.environ["HOME"] = "/tmp"
os.environ["WDM_DISABLE"] = (
"true" # Disable WebDriverManager if used internally
)
os.environ["PATH"] = target_dir + os.pathsep + os.environ.get("PATH", "")
os.chdir("/tmp")
logger.info(f"Chrome binary exists: {os.path.exists('/opt/chrome/chrome')}")
logger.info(
f"Chrome binary is executable: {os.access('/opt/chrome/chrome', os.X_OK)}"
)
log_versions()
browser_launcher.override_driver_dir(target_dir)
# driver = Driver(
# headless=True,
# undetectable=True,
# binary_location="/opt/chrome/chrome",#"/usr/bin/google-chrome-stable",
# browser="chrome",
# chromium_arg="--disable-extensions --disable-software-rasterizer --no-sandbox",
# )
driver = Driver(
headless=True,
undetectable=True,
browser="chrome",
binary_location="/opt/chrome/chrome",
chromium_arg=[
"--headless",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--disable-software-rasterizer",
"--disable-extensions",
"--disable-background-networking",
"--disable-default-apps",
"--disable-sync",
"--metrics-recording-only",
"--no-zygote",
"--single-process",
"--disable-features=IsolateOrigins,site-per-process",
"--window-size=1280,800",
f"--user-data-dir={unique_profile_dir}",
"--enable-logging",
"--v=1",
"--log-level=0",
],
)
logger.info("Browser launched successfully")
logger.info("Current session is {}".format(driver.session_id))
driver.get("data:,") # Stabilize session
time.sleep(0.5)
# Try to open the URL
for attempt in range(3):
try:
logger.info(f"Opening page attempt {attempt + 1}")
# driver.get("data:,") # Minimal blank URL to stabilize
time.sleep(0.5)
driver.get(url)
break
except Exception as e:
logger.warning(f"[Retry {attempt + 1}] driver.get() failed: {e}")
time.sleep(2)
try:
title = driver.title
logger.info(f"Page title: {title}")
except Exception as e:
logger.warning(f"Error fetching title: {e}")
search_box_found = False
try:
response = scrape_reviews(
driver, url
)
logger.info(f"Found review data: {response}")
search_box_found = True
except NoSuchElementException:
logger.warning("Search box not found")
# Take a screenshot (optional)
today_date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
screenshot_key = f"screenshots/{today_date}.png"
# Uncomment to take a screenshot and upload it to S3
# take_screenshot(driver, os.environ.get("BUCKET_NAME", "default-bucket"), screenshot_key)
# Return results
return {
"url": url,
"search_box_found": search_box_found,
"screenshot_key": screenshot_key,
"timestamp": datetime.datetime.now().isoformat(),
}
except Exception as e:
logger.error(f"Error during web scraping: {str(e)}")
if driver:
# Uncomment to take a screenshot of the error state
# take_screenshot(driver, os.environ.get("BUCKET_NAME", "default-bucket"), f"errors/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.png")
pass
return {
"error": str(e),
"url": url,
"timestamp": datetime.datetime.now().isoformat(),
}
finally:
if driver:
driver.quit()
def scrape_reviews(driver, url):
try:
unique_profile_dir = f"/tmp/chrome-profile-{uuid.uuid4()}"
os.makedirs(unique_profile_dir, exist_ok=True)
if not driver.session_id:
logger.error(" mera error hai Invalid or closed WebDriver session.")
return []
logger.info(f"Driver session ID: {driver.session_id}")
driver.get(url)
logger.info("getting url in headless mode")
human_delay(3, 5)
try:
driver.title # Accessing property to force browser ping
except Exception as e:
logger.error(f"Driver seems dead: {e}")
return []
try:
show_all_btn = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Show all reviews']"))
)
show_all_btn.click()
logger.info("Clicked 'Show all reviews' button")
human_delay(2, 3)
except Exception as e:
logger.warning(f"Could not click 'Show all reviews': {e}")
return []
and this is my docker file
FROM amazon/aws-lambda-python:3.10
Install dependencies required by Chrome & SeleniumBase
RUN yum install -y
unzip
wget
tar
gzip
fontconfig
alsa-lib
atk
cups-libs
libXcomposite
libXcursor
libXdamage
libXext
libXi
libXtst
pango
gtk3
libXrandr
xorg-x11-server-Xvfb
libdrm
libgbm
mesa-libOSMesa
libX11
libXfixes
libxcb
libXrender
libX11-xcb
dbus-glib
unixodbc
unixodbc-devel
odbcinst
&& yum clean all && rm -rf /var/cache/yum
RUN mkdir -p /opt/chrome &&
wget -O /opt/chrome/chrome-linux64.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/138.0.7204.102/linux64/chrome-linux64.zip &&
unzip /opt/chrome/chrome-linux64.zip -d /opt/chrome/ &&
mv /opt/chrome/chrome-linux64 /opt/chrome/138 &&
ln -s /opt/chrome/138/chrome /opt/chrome/chrome &&
chmod +x /opt/chrome/138/chrome &&
rm /opt/chrome/chrome-linux64.zip
RUN mkdir -p /var/task/seleniumbase_driver
COPY seleniumbase_driver/ /var/task/seleniumbase_driver
RUN chmod -R +x /var/task/seleniumbase_driver
RUN ls -l /var/task/seleniumbase_driver
RUN mkdir -p /tmp/seleniumbase_driver /tmp/downloaded_files /tmp/seleniumbase_cache
RUN chmod -R +x /tmp/seleniumbase_driver
RUN mkdir -p /tmp/seleniumbase_driver /tmp/downloaded_files /tmp/seleniumbase_cache /tmp/.cache/selenium
RUN chmod -R +x /tmp/seleniumbase_driver
ENV CHROME_BINARY_PATH="/opt/chrome/chrome"
ENV SELENIUMBASE_DRIVER_PATH="/tmp/seleniumbase_driver"
ENV UC_CHROMEDRIVER_PATH="/tmp/seleniumbase_driver/chromedriver"
ENV SELENIUMBASE_DOWNLOADS_FOLDER="/tmp/downloaded_files"
ENV SELENIUMBASE_CACHE_PATH="/tmp/seleniumbase_cache"
ENV WDM_DISABLE="true"
ENV SELENIUM_MANAGER_DISABLE="1"
ENV HOME="/tmp"
ENV PATH="/tmp/seleniumbase_driver:/opt:$PATH"
ENV PYTHONPATH="/var/task"
WORKDIR /var/task
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py main_controller.py /var/task/
COPY scrapper /var/task/scrapper
CMD ["app.lambda_handler"]