Permalink
Browse files

Merge branch 'master' of gist.github.com:acc6cff6f822e6a01829

  • Loading branch information...
2 parents 0f4976d + cbc3e9f commit d1c53d4051388f56a963cece25c06ea1f4ef22ef @mromanello committed Feb 7, 2012
Showing with 15 additions and 6 deletions.
  1. +15 −6 script.py
View
@@ -1,5 +1,9 @@
import urllib
import random
+from xml.etree.ElementTree import ElementTree,fromstring
+
+global input_file
+input_file = "bios.txt"
def format_perseus_uri(i_string):
prefix = "http://data.perseus.org/people/smith:"
@@ -8,19 +12,24 @@ def format_perseus_uri(i_string):
def get(url):
print "...fetching <%s>"%url
return urllib.urlopen(url).read()
-
+
+def parse_xml(input):
+ tree = fromstring(input)
+ el = tree.find(".//head//persName")
+ print list(el.iter("surname"))[0].text
+
try:
- f = open("./bios.txt","r")
+ f = open(input_file,"r")
data = f.read().split("\n")
random.shuffle(data)
f.close()
print "There are %i Smith IDs in the input list..."%len(data)
- print get(format_perseus_uri("caesar-1"))
-
- for n in range(10):
+
+ for n in range(1):
print data[n]
test_url = format_perseus_uri(data[n])
- print "<%s>"%get(test_url)
+ xml = get(test_url)
+ print parse_xml(xml)
except IOError:
print "this time didn't work"

0 comments on commit d1c53d4

Please sign in to comment.