In [1]:
html = '<!DOCTYPE html>\
<html>\
<head>\
<title>PrepInsta Website</title>\
</head>\
<body>\
<h1 class="mainHead">Welcome to PrepInsta</h1>\
<p>This is a great website for us to prepare</p>\
<p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p>\
<p>We wish you <b>All the best</b></p>\
<h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2>\
</body>\
</html>'

In [2]:
from bs4 import BeautifulSoup

In [3]:
data = BeautifulSoup(html,'html.parser')

In [4]:
print(data.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   PrepInsta Website
  </title>
 </head>
 <body>
  <h1 class="mainHead">
   Welcome to PrepInsta
  </h1>
  <p>
   This is a great website for us to prepare
  </p>
  <p class="link">
   This is a
   <a href="https://prepinstaprime.com">
    Link
   </a>
   to PrepInsta website
  </p>
  <p>
   We wish you
   <b>
    All the best
   </b>
  </p>
  <h2>
   <a href="https://prepinsta.com/terms-and-conditions/">
    Terms and Conditions
   </a>
  </h2>
 </body>
</html>


### Hierachical Relationship between Tags

In [5]:
data.a

<a href="https://prepinstaprime.com">Link</a>

In [6]:
data.h2.a

<a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a>

In [7]:
data.body.h2.a

<a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a>

In [8]:
data.html.body.h2.a

<a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a>

In [9]:
data.html.body.h2.a.text

'Terms and Conditions'

### Finding all Paragraph's Text

In [13]:
para_list = data.find_all('p') 

In [14]:
for para in para_list:
    print(para.string)

This is a great website for us to prepare
None
None


### To get string data when other tags are within a tag - stripped_strings

In [20]:
for para in para_list:
    print(para.stripped_strings)

<generator object PageElement.stripped_strings at 0x105662e40>
<generator object PageElement.stripped_strings at 0x105662e40>
<generator object PageElement.stripped_strings at 0x105662e40>


In [21]:
for para in para_list:
    print(list(para.stripped_strings))

['This is a great website for us to prepare']
['This is a', 'Link', 'to PrepInsta website']
['We wish you', 'All the best']


### Viewing contents in a html tag

In [25]:
contents = data.html.contents

In [23]:
data.html

<html><head><title>PrepInsta Website</title></head><body><h1 class="mainHead">Welcome to PrepInsta</h1><p>This is a great website for us to prepare</p><p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p><p>We wish you <b>All the best</b></p><h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2></body></html>

In [30]:
# contents are immediate children to a tag
contents

[<head><title>PrepInsta Website</title></head>,
 <body><h1 class="mainHead">Welcome to PrepInsta</h1><p>This is a great website for us to prepare</p><p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p><p>We wish you <b>All the best</b></p><h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2></body>]

In [31]:
for content in contents:
    print(content)

<head><title>PrepInsta Website</title></head>
<body><h1 class="mainHead">Welcome to PrepInsta</h1><p>This is a great website for us to prepare</p><p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p><p>We wish you <b>All the best</b></p><h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2></body>


In [29]:
print(len(contents))

2


In [32]:
children = data.html.children

In [33]:
for child in children:
    print(child)

<head><title>PrepInsta Website</title></head>
<body><h1 class="mainHead">Welcome to PrepInsta</h1><p>This is a great website for us to prepare</p><p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p><p>We wish you <b>All the best</b></p><h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2></body>


In [45]:
# finding children to body tag

body_children = data.body.children

names_of_children = list(map(lambda tag:tag.name,body_children))

In [52]:
body_children

<list_iterator at 0x104aedb20>

In [40]:
names_of_children

['h1', 'p', 'p', 'p', 'h2']

In [41]:
len(names_of_children)

5

### Descendants of Multiple HTML Tags

In [55]:
descendant_list = list(data.body.descendants)

In [57]:
for descendant in descendant_list:
    print(descendant)
    print('-----------')

<h1 class="mainHead">Welcome to PrepInsta</h1>
-----------
Welcome to PrepInsta
-----------
<p>This is a great website for us to prepare</p>
-----------
This is a great website for us to prepare
-----------
<p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p>
-----------
This is a 
-----------
<a href="https://prepinstaprime.com">Link</a>
-----------
Link
-----------
 to PrepInsta website
-----------
<p>We wish you <b>All the best</b></p>
-----------
We wish you 
-----------
<b>All the best</b>
-----------
All the best
-----------
<h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2>
-----------
<a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a>
-----------
Terms and Conditions
-----------


In [58]:
len(descendant_list)

16

### Finding the parent 

In [59]:
data.b.parent

<p>We wish you <b>All the best</b></p>

In [60]:
data.body.parent

<html><head><title>PrepInsta Website</title></head><body><h1 class="mainHead">Welcome to PrepInsta</h1><p>This is a great website for us to prepare</p><p class="link">This is a <a href="https://prepinstaprime.com">Link</a> to PrepInsta website</p><p>We wish you <b>All the best</b></p><h2><a href="https://prepinsta.com/terms-and-conditions/">Terms and Conditions</a></h2></body></html>

In [61]:
data.body.parent.name

'html'