In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

# Driver Object

In [2]:
# driver (Chrome) location
drv_loc = '/bin/chromedriver'

In [3]:
# initialize driver object; open Chrome browser
driver = webdriver.Chrome(drv_loc)

In [4]:
# website url
url = 'http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-1/'

In [5]:
# load url into driver
driver.get(url)

# Retrieve Website Elements

## XML Path
XPath format:  
```xpath=//tagname[@attribute='value']```

In [6]:
# absolute xpath reference
x_path = 'html/body/div/div/div/div/main/article/header/h1'

In [7]:
# retrieve element
driver.find_element_by_xpath(x_path)

<selenium.webdriver.remote.webelement.WebElement (session="501c7c2d3dff679e3c3a55e94b4b06c1", element="4c18309b-ea87-4de6-9a3c-ab4196008c28")>

In [8]:
# element text
driver.find_element_by_xpath(x_path).text

'Chapter 1: Down the Rabbit-Hole'

In [9]:
# relative xpath reference
x_path = '//h1'
driver.find_element_by_xpath(x_path).text

'Chapter 1: Down the Rabbit-Hole'

In [10]:
# "p" elements
paragraphs = driver.find_elements_by_xpath('//main/article/p')
# first element text
paragraphs[0].text

'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, `and what is the use of a book,’ thought Alice `without pictures or conversation?’'

In [11]:
# x_path for chapter menu
chapter_path = '//ul[@class="sub-menu-ul"]/ul/li/a[@href]'

In [12]:
# chapter menu element text
chapters = driver.find_elements_by_xpath(chapter_path)
for chapter in chapters:
    print(chapter.get_attribute('text'))

Poem: "All in the golden afternoon"
Chapter 1: Down the Rabbit-Hole
Chapter 2: The Pool of Tears
Chapter 3: A Caucus-Race and a long Tale
Chapter 4: The Rabbit sends in a little Bill
Chapter 5: Advice from a Caterpillar
Chapter 6: Pig and Pepper
Chapter 7: A Mad Tea-Party
Chapter 8: The Queen's Croquet-Ground
Chapter 9: The Mock Turtle's Story
Chapter 10: The Lobster Quadrille
Chapter 11: Who stole the Tarts?
Chapter 12: Alice's Evidence
An Easter Greeting to every child who loves Alice
Christmas Greetings


In [13]:
# chapter menu element links
links = driver.find_elements_by_xpath(chapter_path)
for link in links:
    print(link.get_attribute('href'))

http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/all-in-the-golden-afternoon/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-1/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-2/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-3/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-4/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-5/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-6/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-7/
http://www.alice-in-wonderland.net/resources/chapters-script/alices-adventures-in-wonderland/chapter-8/
http://www.alice-in-wonderland.net/resources/c

In [14]:
# xpath element class contains
driver.find_element_by_xpath('//li[contains(@class, "current")]/a').get_attribute("href")

'http://www.alice-in-wonderland.net/resources/'

In [15]:
# xpath element text contains
driver.find_element_by_xpath('//p[contains(text(), "Alice")]').text

'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, `and what is the use of a book,’ thought Alice `without pictures or conversation?’'

In [16]:
# next element
driver.find_element_by_xpath('//article[@class="justify"]/p/following-sibling::p').text

'So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.'

In [17]:
# parent of element
driver.find_element_by_xpath('//article[@class="justify"]/p/parent::article')

<selenium.webdriver.remote.webelement.WebElement (session="501c7c2d3dff679e3c3a55e94b4b06c1", element="64b83b85-6402-4ce4-b7cb-1a4a10414c22")>

## Webpage Interaction

### Enter Text Input

In [18]:
# maximize browser window
driver.maximize_window()
# create search field element object
search = driver.find_element_by_id('searchfield')
# clear field object
search.clear()
# enter text into field object
search.send_keys('cheshire cat')
# wait 1 second
time.sleep(1)
# enter key
search.send_keys(Keys.ENTER) 

In [19]:
# navigate back
driver.execute_script('window.history.go(-1)')

In [20]:
# create button element object
button = driver.find_element_by_link_text('Home')
# click button object
button.click()

In [21]:
# scroll to bottom of page
lenOfPage = driver.execute_script('window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;')
match=False
while(match==False):
        lastCount = lenOfPage
        time.sleep(1)
        lenOfPage = driver.execute_script('window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;')
        if lastCount==lenOfPage:
            match=True

## Close Driver

In [22]:
# close driver object
driver.close()