Permalink
Browse files

Added snippet

  • Loading branch information...
1 parent 0bf5caf commit e0877599205b602b08bc309b909cbc8d792448f4 @lyndsysimon committed Jan 15, 2013
Showing with 16 additions and 0 deletions.
  1. +16 −0 scraping/get_all_ids_on_page.py
@@ -0,0 +1,16 @@
+from bs4 import BeautifulSoup as BS
+import requests as r
+
+def get_ids_from_page(page):
+ response = r.get(page)
+ soup = BS(response.content).body
+
+ return sorted([x.get('id') for x in soup.find_all() if x.get('id') is not None])
+
+if __name__ == '__main__':
+ # In response to the question at the URL below - in short "How do I get the
+ # ids from all objects on a page in Python?"
+ ids = get_ids_from_page('http://stackoverflow.com/questions/14347086/')
+
+ for val in ids:
+ print val

0 comments on commit e087759

Please sign in to comment.