Example 1: Basic HTML Parsing

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<html><head><title>My Web Page</title></head><body><p>Hello, World!</p></body></html>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Print the title tag
print("Title:", soup.title)

# Print the paragraph text
print("Paragraph:", soup.p.text)


Title: <title>My Web Page</title>
Paragraph: Hello, World!


Example 2: Finding Elements by Tag

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Find all list items
items = soup.find_all("li")

# Print each item
for item in items:
    print(item.text)


Item 1
Item 2
Item 3


Example 3: Finding Elements by Class

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<div class='container'><p class='text'>Hello, World!</p></div>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Find the paragraph with class 'text'
paragraph = soup.find("p", class_="text")

# Print the paragraph text
print(paragraph.text)


Hello, World!


Example 4: Finding Elements by ID

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<div id='content'><p>Hello, World!</p></div>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Find the div with id 'content'
div = soup.find("div", id="content")

# Print the paragraph text
print(div.p.text)


Hello, World!


Example 5: Navigating the HTML Tree

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<div><p>Paragraph 1</p><p>Paragraph 2</p></div>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Navigate the HTML tree
div = soup.div
for p in div.find_all("p"):
    print(p.text)


Paragraph 1
Paragraph 2


Example 6: Extracting Attributes

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content
html_content = "<a href='https://example.com'>Visit Example</a>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Extract the 'href' attribute
link = soup.a
print("Link Text:", link.text)
print("Link URL:", link["href"])


Link Text: Visit Example
Link URL: https://example.com




```
Example 7: Web Scraping a Real Website
```



In [None]:
import requests
from bs4 import BeautifulSoup

# Make an HTTP GET request
url = "https://example.com"
response = requests.get(url)

# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")

# Extract and print the page title
title = soup.title
print("Page Title:", title.text)


Page Title: Example Domain


Example 8: Scraping Tables


In [None]:
from bs4 import BeautifulSoup

# Sample HTML content with a table
html_content = "<table><tr><th>Name</th><th>Age</th></tr><tr><td>Alice</td><td>25</td></tr><tr><td>Bob</td><td>30</td></tr></table>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Find the table
table = soup.find("table")

# Extract and print table data
for row in table.find_all("tr")[1:]:
    columns = row.find_all("td")
    name = columns[0].text
    age = columns[1].text
    print(f"Name: {name}, Age: {age}")


Name: Alice, Age: 25
Name: Bob, Age: 30


Example 9: Scraping Images

In [None]:
from bs4 import BeautifulSoup

# Sample HTML content with an image
html_content = "<img src='https://example.com/image.jpg' alt='Sample Image'>"

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Find the image
image = soup.img

# Extract and print the image source and alt text
src = image["src"]
alt = image["alt"]
print("Image Source:", src)
print("Image Alt Text:", alt)


Image Source: https://example.com/image.jpg
Image Alt Text: Sample Image


Example 10: Web Scraping Pagination


In [None]:
import requests
from bs4 import BeautifulSoup

# Web scraping multiple pages with pagination
for page in range(1, 4):  # Assuming there are 3 pages
    url = f"https://example.com/page/{page}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract and process data from each page
    # (e.g., extract items, navigate to the next page)
    # Your code here...


## Celinium web scraping library

Example 1: Opening a Web Page

In [3]:
!pip install selenium

Collecting selenium
  Downloading selenium-4.15.2-py3-none-any.whl.metadata (6.9 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.23.1-py3-none-any.whl.metadata (4.9 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]<3,>=1.26->selenium)
  Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Downloading selenium-4.15.2-py3-none-any.whl (10.2 MB)
   ---------------------------------------- 0.0/10.2 MB ? eta -:--:--
   ---------------------------------------- 0.1/10.2 MB 4.1 MB/s eta 0:00:03
   - -------------------------------------- 0.4/10.2 MB 5.4 MB/s eta 0:00:02
   -- ------------------------------------- 

In [4]:
from selenium import webdriver

# Initialize the WebDriver
driver = webdriver.Chrome()  # or webdriver.Firefox() for Firefox

# Navigate to a website
driver.get("https://www.indiatoday.in/")

# Close the browser
driver.quit()


Example 2: Locating Elements

In [6]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Locate an element by ID
element_by_id = driver.findElement(By.className("tomatoes"));

# Locate an element by class name
element_by_class = driver.find_element_by_class_name("element_class")

# Locate an element by tag name
element_by_tag = driver.find_element_by_tag_name("element_tag")

# Close the browser
driver.quit()


AttributeError: 'WebDriver' object has no attribute 'find_element_by_id'

Example 3: Interacting with Elements

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Input text into an input field
input_field = driver.find_element_by_name("username")
input_field.send_keys("your_username")

# Click a button
button = driver.find_element_by_id("login_button")
button.click()

# Close the browser
driver.quit()


Example 4: Handling Dropdowns

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select

driver = webdriver.Chrome()
driver.get("https://example.com")

# Locate the dropdown element
dropdown = Select(driver.find_element_by_id("country_dropdown"))

# Select an option by value
dropdown.select_by_value("USA")

# Close the browser
driver.quit()


ModuleNotFoundError: No module named 'selenium'

Example 5: Handling Alerts

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Click a button to trigger an alert
button = driver.find_element_by_id("alert_button")
button.click()

# Switch to the alert and accept it
alert = driver.switch_to.alert
alert.accept()

# Close the browser
driver.quit()


Example 6: Navigating Back and Forward

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Navigate to another page
driver.get("https://example.com/page2")

# Go back to the previous page
driver.back()

# Go forward
driver.forward()

# Close the browser
driver.quit()


Example 7: Working with Frames

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Switch to a frame by name or ID
driver.switch_to.frame("frame_name")

# Switch back to the main content
driver.switch_to.default_content()

# Close the browser
driver.quit()


Example 8: Capturing Screenshots

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# Capture a screenshot
driver.save_screenshot("screenshot.png")

# Close the browser
driver.quit()


Example 9: Waiting for Elements

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://example.com")

# Wait for an element to be present and visible
element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "element_id"))
)

# Close the browser
driver.quit()


Example 10: Scraping Dynamic Content

In [None]:
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com/infinite_scroll_page")

# Scroll down to load dynamic content
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# Capture the page source after scrolling
page_source = driver.page_source

# Close the browser
driver.quit()
