Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
seananaidu committed Feb 19, 2017
1 parent 36c7385 commit 3fd3a9e
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion scraper.py
Expand Up @@ -19,7 +19,7 @@
root = lxml.etree.fromstring(xmldata)

# # To print all of the pdf in xml:
# print lxml.etree.tostring(root, pretty_print=True)
print lxml.etree.tostring(root, pretty_print=True)

# # To print a page of the pdf in xml:

Expand All @@ -28,6 +28,7 @@
print "There are",len(pages),"pages"

# # For each page in the document and for each element in a page
"""
for page in pages[3:4]:
for el in page:
# # If the element is tagged as text, print our that text and its attribute
Expand All @@ -41,6 +42,7 @@
elif int(el.attrib['left']) < 760: data['Colonies_renov'] = el.text
elif int(el.attrib['left']) < 900: data['Percent_renov'] = el.text
print data
"""

# # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=[],table_name = 'Colonies_2015_Jan_Mar', data=data)
Expand Down

0 comments on commit 3fd3a9e

Please sign in to comment.