# Introduction to Beaufiful Soup

In [1]:
from bs4 import BeautifulSoup

# note that you need 3 quotes -> ''' or """
html_content = """ <!DOCTYPE html>
<html lang="en">
<head>
    <title>Example.com</title>
</head>
<body>
    <div class="my_class">
        <h1>Welcome to BeautifulSopu</h1>
        <p>This is a paragraph.</p>
        <p>This is another paragraph.</p>
        <p>This is the third paragraph.</p>
        <ul>
            <li>Item 1</li>
            <li>Item 2</li>
        </ul>
    </div>
</body>
</html> """

# Create BeautifulSoup parser object
soup = BeautifulSoup(html_content, "html.parser")

In [2]:
# Access the title of a specific tag
title = soup.title
title.text

'Example.com'

In [3]:
paragraph = soup.p.text
paragraph

'This is a paragraph.'

---
## Navigating the parse tree
### Tag Navigation

In [4]:
# Finding the first paragraph
paragraph = soup.find("p")
paragraph

<p>This is a paragraph.</p>

In [5]:
# Finding all the paragraphs and creating an array of paragraph text
paragraphs = soup.find_all("p")

for para in paragraphs:
    print(para.text)

# paragraphs = [para.text for para in paragraphs]
# paragraphs

This is a paragraph.
This is another paragraph.
This is the third paragraph.


### Parent and Sibling Navigation

In [6]:
element = soup.h1
parent_element = element.parent
print("Parent of H1 is:\n", parent_element)

Parent of H1 is:
 <div class="my_class">
<h1>Welcome to BeautifulSopu</h1>
<p>This is a paragraph.</p>
<p>This is another paragraph.</p>
<p>This is the third paragraph.</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
</div>


In [7]:
previous_sibling = soup.h1.find_previous_sibling()
print("Previous sibling element of <h1>:\n", previous_sibling)

Previous sibling element of <h1>:
 None


In [8]:
previous_sibling = soup.p.find_previous_sibling()
print("Previous sibling element of <p>:\n", previous_sibling)

Previous sibling element of <p>:
 <h1>Welcome to BeautifulSopu</h1>


In [9]:
next_sibling = soup.h1.find_next_sibling()
print("Next sibling element of <h1>:\n", next_sibling)

Next sibling element of <h1>:
 <p>This is a paragraph.</p>


In [10]:
next_sibling = soup.ul.find_next_sibling()
print("Next sibling element of <ul>:\n", next_sibling)

Next sibling element of <ul>:
 None


### Child Navigation

In [11]:
# Accessing the child elements
div = soup.div
children = div.children

print("Child elements of <div>:")
for child in children:
    print(child)

Child elements of <div>:


<h1>Welcome to BeautifulSopu</h1>


<p>This is a paragraph.</p>


<p>This is another paragraph.</p>


<p>This is the third paragraph.</p>


<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>




---
## Searching by CSS and Class attributes

In [12]:
elements = soup.find_all("div", class_="my_class")
elements

[<div class="my_class">
 <h1>Welcome to BeautifulSopu</h1>
 <p>This is a paragraph.</p>
 <p>This is another paragraph.</p>
 <p>This is the third paragraph.</p>
 <ul>
 <li>Item 1</li>
 <li>Item 2</li>
 </ul>
 </div>]

---