Permalink
Browse files

added a function to parse the XML w/ ElementTree, although I should p…

…robably just write an XSLT
  • Loading branch information...
1 parent 1072b1e commit ffbaf925c5ea5ae64e15fe45d649e611190ab609 @mromanello committed Feb 6, 2012
Showing with 19,890 additions and 39,776 deletions.
  1. +19,875 −19,875 bios.txt
  2. +0 −19,895 data/bios.txt
  3. +15 −6 script.py
View
39,750 bios.txt
19,875 additions, 19,875 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
19,895 data/bios.txt
0 additions, 19,895 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
21 script.py
@@ -1,5 +1,9 @@
import urllib
import random
+from xml.etree.ElementTree import ElementTree,fromstring
+
+global input_file
+input_file = "bios.txt"
def format_perseus_uri(i_string):
prefix = "http://data.perseus.org/people/smith:"
@@ -8,19 +12,24 @@ def format_perseus_uri(i_string):
def get(url):
print "...fetching <%s>"%url
return urllib.urlopen(url).read()
-
+
+def parse_xml(input):
+ tree = fromstring(input)
+ el = tree.find(".//head//persName")
+ print list(el.iter("surname"))[0].text
+
try:
- f = open("./bios.txt","r")
+ f = open(input_file,"r")
data = f.read().split("\n")
random.shuffle(data)
f.close()
print "There are %i Smith IDs in the input list..."%len(data)
- print get(format_perseus_uri("caesar-1"))
-
- for n in range(10):
+
+ for n in range(1):
print data[n]
test_url = format_perseus_uri(data[n])
- print "<%s>"%get(test_url)
+ xml = get(test_url)
+ print parse_xml(xml)
except IOError:
print "this time didn't work"

0 comments on commit ffbaf92

Please sign in to comment.