In [1]:
from bs4 import BeautifulSoup

In [2]:
html_doc = """<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

In [3]:
# Transforma a variável html_doc em um objeto soup
soup = BeautifulSoup(html_doc)
soup.body.p

<p class="title"><b>The Dormouse's story</b></p>

In [4]:
# Na tag <p>, encontra a tag <a> e imprime o seu conteúdo
soup.body.p.b.text

"The Dormouse's story"

In [5]:
# Mostra a classe da tag <a>
soup.body.a['class']

['sister']

In [6]:
# Mostra todo o conteúdo da tag <body>
soup.body.contents

['\n',
 <p class="title"><b>The Dormouse's story</b></p>,
 '\n\n',
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 '\n\n',
 <p class="story">...</p>,
 '\n']

In [7]:
# Imprime todas as tags filhas da tag <head>
for c in soup.head.descendants:
    print(c)

<title>The Dormouse's story</title>
The Dormouse's story


In [8]:
# Mostra o conteúdo string da tag <tittle>
soup.head.title.string

"The Dormouse's story"

In [9]:
# Mostra a tag <a>
soup.body.a

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [10]:
# Permite navegar entre as tags
soup.body.a.next_sibling.next_sibling

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

In [11]:
# Transforma em string o conteudo da tag <b>
conteudo = str(soup.body.b.string)
type(conteudo)

str

In [12]:
# Encontra todas as tags <a>
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [13]:
# Encontra uma lista de tags
soup.find_all(['a', 'b'])

[<b>The Dormouse's story</b>,
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [18]:
# Encontra todas as tags da classe story
soup.find_all(class_='story')

[<p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="story">...</p>]

In [19]:
# Encontra a tag de id=link2 e suas irmãs subsequentes
tag_link2 = soup.find(id='link2')
list(tag_link2.next_siblings)

[' and\n',
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>,
 ';\nand they lived at the bottom of a well.']