In [1]:
from bs4 import BeautifulSoup

In [2]:
html = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>First HTML Page</title>
</head>
<body>
  <div id="first">
    <h3 data-example="yes">hi</h3>
    <p>more text.</p>
  </div>
  <ol>
    <li class="special super-special">This list item is special.</li>
    <li class="special">This list item is also special.</li>
    <li>This list item is not special.</li>
  </ol>
  <div data-example="yes">bye</div>
</body>
</html>
"""


In [3]:
#parser is used to convert the obtained html formatte into the python object
soup = BeautifulSoup(html, "html.parser")



In [4]:
print(soup)


<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>First HTML Page</title>
</head>
<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>
</html>



In [5]:
print(soup.body)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>


In [25]:
print(soup.body.div) #first occurence of div in body part

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


# we  use "find" for html attributes and "select" for CSS atributes

In [24]:
print(soup.find_all("div"))

[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>, <div data-example="yes">bye</div>]


In [8]:
print(soup.find(class_="special"))

<li class="special super-special">This list item is special.</li>


In [9]:
print(soup.find_all(class_="special"))

[<li class="special super-special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [10]:
print(soup.find_all(attrs={"data-example":"yes"}))

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]


# The next three are css attribute syntax.

In [11]:
print(soup.select("#first")) # "#" is used for id representation


[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>]


In [12]:
print(soup.select(".special"))# "." is used for class representation


[<li class="special super-special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [13]:
print(soup.select("[data-example]"))#"[]" is used for attribute representation

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]


In [14]:
for el in soup.select(".special"):
    print(el.get_text())

This list item is special.
This list item is also special.


In [15]:
print(soup.find("div")["id"])

first


In [16]:
print(soup.body.contents)

['\n', <div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>, '\n', <ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>, '\n', <div data-example="yes">bye</div>, '\n']


In [26]:
print(soup.body)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>


In [17]:
print(soup.body.contents[1].contents)


['\n', <h3 data-example="yes">hi</h3>, '\n', <p>more text.</p>, '\n']


In [28]:
print(soup.find(class_="special").parent)


<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>


In [19]:
print(soup.find(class_="special").parent.parent)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>


In [20]:
print(soup.find(id="first").find_next_sibling())


<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>


In [21]:
print(soup.select("[data-example]")[1].find_previous_sibling())


<ol>
<li class="special super-special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>


In [22]:
print(soup.find(class_="special super-special").find_next_sibling(class_="special"))

<li class="special">This list item is also special.</li>


In [23]:
print(soup.find("h3").parent)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
