## Parsing a web page

In [None]:
from urllib import request

#Webpage that will be scraped
myurl = 'https://www.cnbc.com/2020/02/14/stanford-scientist-on-proven-habits-that-will-make-you-more-productive.html'

response = request.urlopen(myurl)
# After issuing our request, we get a Response object. This object has a status_code property,
# which indicates if the page was downloaded successfully:
# A status_code of 200 means that the page downloaded successfully. A status code starting with a 2
# generally indicates success, and a code starting with a 4 or a 5 indicates an error.
status = response.code
print('status:', status, '\n')
if status != 200:
    print('Bad Status')
    quit()

In [None]:
import sys

# The response object contains a webpage representation that needs to be decoded and converted to a Python string.
try:
    html = response.read().decode('utf8')
except ConnectionResetError:
    print('Connection Reset Error')
    quit()
except Exception as ex:
    template = "An exception of type {0} occurred. Arguments:\n{1!r}"
    message = template.format(type(ex).__name__, ex.args)
    print(message)
    quit()

print(html)


In [None]:
# We can use the BeautifulSoup library to parse the HTML document, and extract the text
# from the p tag. We first have to import the library, and create an instance of
# the BeautifulSoup class to parse our document:
import bs4
soup = bs4.BeautifulSoup(html, 'html.parser')

# We can now print out the HTML content of the page, formatted nicely, using the
# prettify method on the BeautifulSoup object:
print(soup.prettify())


In [None]:
p_tags = (soup.find_all('p'))

In [None]:
p_tags   #I need to target the p tags that have the content I'm looking for

In [None]:
len(p_tags)

In [None]:
p_tags = (soup.find_all('p'))
for i in range(16,28):
    print(p_tags[i].get_text())    