Skip to content

Commit

Permalink
remane: format -> template
Browse files Browse the repository at this point in the history
  • Loading branch information
slaypni committed Feb 13, 2013
1 parent 7f15d67 commit e7dc8c6
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 14 deletions.
10 changes: 5 additions & 5 deletions xml2data/testsuite/test_xml2data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def unpatch_urllib2(self):
restore()

url = 'http://hp.vector.co.jp/authors/VA038583/'
format = """
template = """
{'apps': [html body div#doc div#main-container div.section:first-child
div.goods-container div.goods @ {
'name': div.top span.name $text,
Expand Down Expand Up @@ -63,20 +63,20 @@ def unpatch_urllib2(self):
def test_urlload(self):
self.patch_urllib2()

data = urlload(self.url, self.format)
data = urlload(self.url, self.template)
self.assertEqual(data, self.answer)

self.unpatch_urllib2()

def test_load(self):
data = load(StringIO(_PUNILABO_HTML), self.format)
data = load(StringIO(_PUNILABO_HTML), self.template)
self.assertEqual(data, self.answer)

def test_loads(self):
data = loads(_PUNILABO_HTML, self.format)
data = loads(_PUNILABO_HTML, self.template)
self.assertEqual(data, self.answer)

data = loads(_PUNILABO_HTML.decode('shift-jis'), self.format)
data = loads(_PUNILABO_HTML.decode('shift-jis'), self.template)
self.assertEqual(data, self.answer)


Expand Down
18 changes: 9 additions & 9 deletions xml2data/xml2data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import chardet


def urlload(url, format, param=None):
def urlload(url, template, param=None):
res = urllib2.urlopen(url, param)
document = ''.join(res)
encoding = ''
Expand All @@ -19,35 +19,35 @@ def urlload(url, format, param=None):
encoding = m.group('encoding') if m is not None else ''
encoding = encoding or chardet.detect(document)['encoding']
document = document.decode(encoding, 'ignore')
return Parser.parse(format, document)
return Parser.parse(template, document)


def load(s, format):
return loads(''.join(s), format)
def load(s, template):
return loads(''.join(s), template)


def loads(s, format):
def loads(s, template):
if isinstance(s, str):
encoding = chardet.detect(s)['encoding']
s = s.decode(encoding)
return Parser.parse(format, s)
return Parser.parse(template, s)


class Parser:

@classmethod
def parse(cls, format, document=None):
def parse(cls, template, document=None):
xml = document
if xml is not None and not isinstance(xml, etree.ElementBase):
xml = etree.parse(StringIO(document), etree.HTMLParser())
(d, c) = cls._parse(format, xml)
(d, c) = cls._parse(template, xml)
if c.lstrip() != '': # if non-parsed data remain
raise Xml2DataSyntaxError()
return d

@classmethod
def _parse(cls, content, xml=None):
""" parse a given content of the format.
""" parse a given content of the template.
it returns a tuple (parsed data, remaining content).
"""
c = content.lstrip()
Expand Down

0 comments on commit e7dc8c6

Please sign in to comment.