- webbrowser - comes with python and opens a browser to specific page
- requests - downloads files and web pages from the internet
- bs4 (beautifulsoup) - parses HTML
- selenium - launches and controls a web browser; can fill forms and simulate clicks

In [1]:
import webbrowser
webbrowser.open('https://inventwithpython.com/')

True

In [2]:
import requests

In [3]:
res = requests.get('https://automatetheboringstuff.com/files/rj.txt')
type(res)

requests.models.Response

In [4]:
res.status_code == requests.codes.ok

True

In [6]:
len(res.text)

178978

In [7]:
print(res.text[:250])

The Project Gutenberg EBook of Romeo and Juliet, by William Shakespeare

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Projec


In [8]:
res = requests.get('https://inventwithpython.com/page_that_does_not_exist')
res.raise_for_status()

HTTPError: 404 Client Error: Not Found for url: https://inventwithpython.com/page_that_does_not_exist

In [9]:
res = requests.get('https://automatetheboringstuff.com/files/rj.txt')
res.raise_for_status()
play_file = open('RomeoAndJuliet.txt', 'wb')
for chunk in res.iter_content(100000):
    play_file.write(chunk)
play_file.close()

In [10]:
import bs4

In [11]:
res = requests.get('https://nostartch.com')
res.raise_for_status()
no_starch_soup = bs4.BeautifulSoup(res.text, 'html.parser')
type(no_starch_soup)

bs4.BeautifulSoup

In [12]:
example_file = open('example.html')
example_soup = bs4.BeautifulSoup(example_file, 'html.parser')
type(example_soup)

bs4.BeautifulSoup

In [13]:
elems = example_soup.select('#author')
type(elems)

bs4.element.ResultSet

In [4]:
import lxml
import bs4
example_file = open('example.html')
example_soup = bs4.BeautifulSoup(example_file.read(), 'lxml')
elems = example_soup.select('#author')
type(elems) # book example returns list, my exec returns bs4.element.ResultSet; likely a recent update?

bs4.element.ResultSet

In [5]:
len(elems)

1

In [6]:
type(elems[0])

bs4.element.Tag

In [7]:
str(elems[0])

'<span id="author">Al Sweigart</span>'

In [8]:
elems[0]

<span id="author">Al Sweigart</span>

In [9]:
elems[0].getText()

'Al Sweigart'

In [10]:
elems[0].attrs

{'id': 'author'}

In [11]:
elems[0].text

'Al Sweigart'

In [12]:
elems[0].id

In [14]:
elems[0].attrs['id']

'author'

In [15]:
p_elems = example_soup.select('p')
str(p_elems[0])

'<p>download my <strong>Python</strong> book from <a href="https://inventwithpython.com">my website</a>.</p>'

In [16]:
p_elems[0].text

'download my Python book from my website.'

In [17]:
str(p_elems[1])

'<p class="slogan">learn python the easy way!</p>'

In [18]:
p_elems[1].text

'learn python the easy way!'

In [19]:
str(p_elems[2])

'<p>by <span id="author">Al Sweigart</span></p>'

In [20]:
p_elems[2].text

'by Al Sweigart'

In [22]:
span_elem = example_soup.select('span')[0]
str(span_elem)

'<span id="author">Al Sweigart</span>'

In [23]:
span_elem.get('id')

'author'

In [24]:
span_elem.get('some_nonexistent_attr') == None

True

In [25]:
span_elem.attrs

{'id': 'author'}

In [1]:
from selenium import webdriver

In [5]:
browser = webdriver.Firefox()
type(browser)

selenium.webdriver.firefox.webdriver.WebDriver

In [6]:
browser.get('https://inventwithpython.com')

In [8]:
try:
    elem = browser.find_element_by_class_name('cover-thumb') # book typo space
    print(f'found <{elem.tag_name}> element with that class name') # randomly reverting back to string interpolation?
except:
    print('was not able to find an element with that name')
# welp, looks like the find_element_by_* methods are deprecated; gotta look up find_element() doc
# from selenium.webdriver.common.by import By
# ID = "id"
# XPATH = "xpath"
# LINK_TEXT = "link text"
# PARTIAL_LINK_TEXT = "partial link text"
# NAME = "name"
# TAG_NAME = "tag name"
# CLASS_NAME = "class name"
# CSS_SELECTOR = "css selector"

found <img> element with that class name


  elem = browser.find_element_by_class_name('cover-thumb')


In [9]:
from selenium.webdriver.common.by import By
link_elem = browser.find_element(By.LINK_TEXT, 'Read Online for Free')
type(link_elem)
link_elem.click()

In [10]:
browser = webdriver.Firefox()
browser.get('https://login.metafilter.com')
user_elem = browser.find_element(By.ID, 'user_name')
user_elem.send_keys('asdf')
password_elem = browser.find_element(By.ID, 'user_pass')
password_elem.send_keys('qwer')
password_elem.submit()

In [11]:
from selenium.webdriver.common.keys import Keys

In [12]:
browser = webdriver.Firefox()
browser.get('https://nostarch.com')
html_elem = browser.find_element(By.TAG_NAME, 'html')
html_elem.send_keys(Keys.END)
html_elem.send_keys(Keys.HOME)

In [13]:
# browser.back()
# browser.forward()
# browser.refresh()
browser.quit()

## practice questions

1. webbrowser can open web pages in your browser, requests sends web requests through python directly, bs4 parses html, selenium simulates user input in a browser
2. `requests.models.Response`, `res.read()`
3. `res.raise_for_status()`
4. `res.status_code`
5. chunk by chunk in `res.itercontent(100000)`
6. F12 for me
7. inspector
8. `'#main'`
9. `'.highlight'`
10. `'div div'`
11. `'button[value="favorite"]'`
12. `spam.text`
13. `eggs = spam.attrs`
14. by importing the submodule `from selenium import webdriver` 
15. first returns only 1 element, second returns all that it finds
16. `.click()` and `.send_keys()`
17. `.submit()`
18. `browser.forward()`, `browser.back()`, `browser.refresh()`