In [1]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from dotenv import load_dotenv
import os
import time

# Test notebook for selenium
# Connect to the selenium container
from selenium.webdriver.firefox.options import Options

options = Options()
options.add_argument("--headless")  # Run in headless mode
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Bypass OS security model
options.add_argument("--disable-dev-shm-usage")  # Overcome limited resource problems

def start_driver():
    return webdriver.Remote(
        command_executor="http://rsm-selenium:4444/wd/hub",
        options=options
    )

In [2]:
# if running without docker
# URL = "http://127.0.0.1:8123"

# if running in docker, use the container name as the hostname
# this uses the shared 'rsm-docker' network to connect
URL = "http://rsm-msba-k8s-latest:8123"

# URL = "https://rsm-shiny-02.ucsd.edu/selenium/"

In [3]:
print("=== BeautifulSoup results ===")
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')

# BeautifulSoup can only static elements like the title
print("Title found by BeautifulSoup:", soup.find('title').text if soup.find('title') else "No title found")
print("Button found by BeautifulSoup:", bool(soup.find('button', id="showText")))
print("Dynamic text element found by BeautifulSoup:", bool(soup.find(id="dynamicText")))
print("Can BeautifulSoup find the dynamic text?", soup.find(id="dynamicText"))


=== BeautifulSoup results ===
Title found by BeautifulSoup: Scraping Demo Page
Button found by BeautifulSoup: True
Dynamic text element found by BeautifulSoup: True
Can BeautifulSoup find the dynamic text? <div class="shiny-text-output" id="dynamicText"></div>


In [4]:
driver = start_driver()
print("=== Selenium results ===")
try:
    driver.get(URL)

    # Wait for the page to load
    wait = WebDriverWait(driver, 5)

    # Check for title
    print("Title found by Selenium:", driver.title)

    # Look for the button
    button = wait.until(EC.presence_of_element_located((By.ID, "showText")))
    print("Button found by Selenium:", bool(button))

    # Click the button
    button.click()
    time.sleep(1)  # Give the app a moment to update

    # Look for the dynamic text
    dynamic_text = wait.until(EC.presence_of_element_located((By.ID, "dynamicText")))
    print("Dynamic text after click:", dynamic_text.text)

finally:
    driver.quit()

=== Selenium results ===
Title found by Selenium: Scraping Demo Page
Button found by Selenium: True
Dynamic text after click: This text was dynamically generated!


In [5]:
driver = start_driver()
print("=== Selenium results with authentication ===")
try:
    driver.get(URL)
    wait = WebDriverWait(driver, 5)

    # Check if we need to login (look for common login elements)
    try:
        login_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[type='submit'], #login, .login-button")))
        if login_button:
            # Find username and password fields
            username_field = driver.find_element(By.CSS_SELECTOR, "input[type='email'], input[name='username']")
            password_field = driver.find_element(By.CSS_SELECTOR, "input[type='password']")

            # Enter credentials (these should be stored securely in practice)
            load_dotenv()
            username = os.getenv('SELENIUM_USERNAME')
            password = os.getenv('SELENIUM_PASSWORD')

            username_field.send_keys(username)
            password_field.send_keys(password)
            login_button.click()

            # Wait for login to complete
            time.sleep(2)
    except:
        print("No login required or already logged in")

    # Original functionality
    button = wait.until(EC.presence_of_element_located((By.ID, "showText")))
    print("Button found by Selenium:", bool(button))

    button.click()
    time.sleep(1)

    dynamic_text = wait.until(EC.presence_of_element_located((By.ID, "dynamicText")))
    print("Dynamic text after click:", dynamic_text.text)

except Exception as e:
    print(f"An error occurred: {str(e)}")
finally:
    driver.quit()

=== Selenium results with authentication ===
No login required or already logged in
Button found by Selenium: True
Dynamic text after click: This text was dynamically generated!
