Skip to content

Commit

Permalink
finding dd tags
Browse files Browse the repository at this point in the history
  • Loading branch information
lexie11 committed Jul 17, 2019
1 parent 0086e42 commit 2e1c36e
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion scraper.py
Expand Up @@ -8,15 +8,21 @@

html = scraperwiki.scrape("https://beta.companieshouse.gov.uk/company/04503188")

# # Find something on the page using css selectors
# Turn html into a string and put in variable root
root = lxml.html.fromstring(html)

# # Find something on the page using css selectors
name = root.cssselect('title')

for company in name:
# print lxml.html.tostring(title)
print company.text

# # Find a second thing on the page using css selectors
address = root.cssselect('dd')

for companyaddress in address:
print companyaddress.text

# # # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
Expand Down

0 comments on commit 2e1c36e

Please sign in to comment.