# Navigating Beautiful Soup 

In [1]:
from bs4 import BeautifulSoup

In [2]:
html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>First HTML Page</title>
</head>
<body>
  <div id="first">
    <h3 data-example="yes">hi</h3>
    <p>more text.</p>
  </div>
  <ol>
    <li class="super-special">This list item is special.</li>
    <li>This list item is not special.</li>
    <li class="special">This list item is also very special.</li>
  </ol>
  <div data-example="yes">bye</div>
</body>
</html>
"""

In [3]:
soup = BeautifulSoup(html, "html.parser")

## how to nagivate the `soup`?

**via Tags**

- parents / parents
- contents
- next_siblings / next_siblings
- previous_sibling / previous_siblings

**via Searching**
- find_parent / find_parents
- find_next_sibling / find_next_siblings

### `contents` via Tags
- returns all contents within each Tags as a list 
- elements are seperated with `\n`

In [9]:
data = soup.body.contents
print(data)

['\n', <div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>, '\n', <ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>, '\n', <div data-example="yes">bye</div>, '\n']


In [6]:
# the first element[0] is is '\n'. so we go for [1]
data = soup.body.contents[1] 
print(data)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [7]:
data = soup.body.contents[1].contents[1] 
print(data)

<h3 data-example="yes">hi</h3>


In [8]:
data['data-example']

'yes'

### via Searching
- ex) < div > and < ol > are sibling Tags (sibling)
- They are on the same level

#### `next_sibling`

In [12]:
data_2 = soup.body.contents[1]
print(data_2)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [13]:
data_2 = soup.body.contents[1].next_sibling.next_sibling
print(data_2)

<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>


**`find_next_sibling`** skips '/n' newline

In [19]:
data = soup.find(id = "first")
print(data)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [20]:
data = soup.find(id = "first").find_next_sibling()
print(data)

<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>


In [21]:
data = soup.find(id = "first").find_next_sibling().find_next_sibling()
print(data)

<div data-example="yes">bye</div>


**`find_previous_sibling`**

In [22]:
data = soup.select("[data-example]")[1].find_previous_sibling()
print(data)

<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>


In [23]:
#find previous sibling with certain condition
data = soup.select("[data-example]")[1].find_previous_sibling()
print(data)

<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>


#### `parent`

In [15]:
data_3 = soup.find(class_="super-special")
print(data_3)

<li class="super-special">This list item is special.</li>


In [16]:
data_3 = soup.find(class_="super-special").parent
print(data_3)

<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>


In [17]:
data_3 = soup.find(class_="super-special").parent.parent
print(data_3)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>
<div data-example="yes">bye</div>
</body>


**`find_parent`** skips '\n' newline

In [26]:
data = soup.find("h3").find_parent()
print(data)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [27]:
# find parent with certain condition
data = soup.find("h3").find_parent("body")
print(data)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="super-special">This list item is special.</li>
<li>This list item is not special.</li>
<li class="special">This list item is also very special.</li>
</ol>
<div data-example="yes">bye</div>
</body>
