### Screen Reading with Selenium

In [2]:
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.common.by import By

In [14]:
#download it from https://sites.google.com/chromium.org/driver/
path = "./chromedriver-win64/chromedriver.exe"
service = Service(executable_path=path)

browser = webdriver.Chrome(service=service)
browser.get('http://www.fairphone.com/we-are-fairphone/')
browser.maximize_window()

In [15]:
content = browser.find_element(By.CSS_SELECTOR,'div.content')
print(content.text)

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"div.content"}
  (Session info: chrome=119.0.6045.160); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF794BA82B2+55298]
	(No symbol) [0x00007FF794B15E02]
	(No symbol) [0x00007FF7949D05AB]
	(No symbol) [0x00007FF794A1175C]
	(No symbol) [0x00007FF794A118DC]
	(No symbol) [0x00007FF794A4CBC7]
	(No symbol) [0x00007FF794A320EF]
	(No symbol) [0x00007FF794A4AAA4]
	(No symbol) [0x00007FF794A31E83]
	(No symbol) [0x00007FF794A0670A]
	(No symbol) [0x00007FF794A07964]
	GetHandleVerifier [0x00007FF794F20AAB+3694587]
	GetHandleVerifier [0x00007FF794F7728E+4048862]
	GetHandleVerifier [0x00007FF794F6F173+4015811]
	GetHandleVerifier [0x00007FF794C447D6+695590]
	(No symbol) [0x00007FF794B20CE8]
	(No symbol) [0x00007FF794B1CF34]
	(No symbol) [0x00007FF794B1D062]
	(No symbol) [0x00007FF794B0D3A3]
	BaseThreadInitThunk [0x00007FFBF0237344+20]
	RtlUserThreadStart [0x00007FFBF1AE26B1+33]


In [16]:
all_bubbles = browser.find_elements(By.CSS_SELECTOR,'div.content')
print(len(all_bubbles))

for bubble in all_bubbles:
    print(bubble.text)

0


In [17]:
iframe = browser.find_element(By.XPATH,'//iframe')
new_url = iframe.get_attribute('src')
browser.get(new_url)

In [18]:
# Now we can see if we can load all of the content bubbles
all_bubbles = browser.find_elements(By.CSS_SELECTOR,'div.content')
for elem in all_bubbles:
    print(elem.text)

In [19]:
from selenium.common.exceptions import NoSuchElementException

all_data = []

for elem in all_bubbles:
    elem_dict = {}
    
    elem_dict['full_name'] = elem.find_element(By.CSS_SELECTOR, 'div.fullname').text
    elem_dict['short_name'] = elem.find_element(By.CSS_SELECTOR, 'div.name').text
    elem_dict['text_content'] = elem.find_element(By.CSS_SELECTOR, 'div.twine-description').text    
    elem_dict['timestamp'] = elem.find_element(By.CSS_SELECTOR, 'div.when').text    
    elem_dict['original_link'] = elem.find_element(By.CSS_SELECTOR, 'div.when a').get_attribute('href')
    try:
        elem_dict['picture'] = elem.find_element(By.CSS_SELECTOR, 'div.picture img').get_attribute('src')
    except NoSuchElementException:
        elem_dict['picture'] = None
    all_data.append(elem_dict)

In [21]:
# we got no data because the site is updated and these class names are no longer used
all_data

[]

In [19]:
# the cleaned-up and simplified script
from selenium.common.exceptions import NoSuchElementException, WebDriverException
from selenium import webdriver

def find_text_element(html_element, element_css):
    try:
        return html_element.find_element(By.CSS_SELECTOR, element_css).text
    except NoSuchElementException:
        pass
    return None

def find_attr_element(html_element, element_css, attr):
    try:
        return html_element.find_element(By.CSS_SELECTOR, element_css).get_attribute(attr)
    except NoSuchElementException:
        pass
    return None

def get_browser():
    path = "./chromedriver-win64/chromedriver.exe"
    service = Service(executable_path=path)
    browser = webdriver.Chrome(service=service)
    return browser

def main():
    browser = get_browser()
    browser.get('http://www.fairphone.com/fairphone/')
    
    all_data = []
    browser.implicitly_wait(10)
    try:
        all_bubbles = browser.find_elements(By.CSS_SELECTOR, 'div.twine-item-border')
    except WebDriverException:
        browser.implicitly_wait(5)
        all_bubbles = browser.find_elements(By.CSS_SELECTOR, 'div.twine-item-border')
    
    for elem in all_bubbles:
        elem_dict = {}
        content = elem.find_element(By.CSS_SELECTOR, 'div.conent')
        elem_dict['full_name'] = find_text_element(content, 'div.fullname')
        elem_dict['short_name'] = find_attr_element(content, 'div.name', 'innerHTML')
        elem_dict['text_content'] = find_text_element(content, 'div.twine-description')
        elem_dict['timestamp'] = find_attr_element(elem, 'div.when a abbr.timeago', 'title')
        elem_dict['original_link'] = find_attr_element(elem, 'div.when a', 'data-href')
        elem_dict['picutre'] = find_attr_element(content, 'div.picture img', 'src')
        
        all_data.append(elem_dict)
    browser.quit()
    print(all_data)

main()

[]
