Permalink
Browse files

Only lowercase attribute names on tokens with an "unprefixed tag name…

…" (also used when dropping duplicates)
  • Loading branch information...
1 parent df8aa3f commit 90eba7b8d15b18f0893fce75a8e7b08e6957ccf5 t.broyer committed Mar 15, 2008
Showing with 42 additions and 26 deletions.
  1. +29 −19 python/src/html5lib/html5parser.py
  2. +13 −7 python/src/html5lib/tokenizer.py
View
48 python/src/html5lib/html5parser.py
@@ -176,25 +176,35 @@ def parseError(self, errorcode="XXX-undefined-error", datavars={}):
def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """
- if token["type"] == "EmptyTag":
- # When a solidus (/) is encountered within a tag name what happens
- # depends on whether the current tag name matches that of a void
- # element. If it matches a void element atheists did the wrong
- # thing and if it doesn't it's wrong for everyone.
-
- if token["name"] not in voidElements and ":" not in token["name"][1:-1]:
- self.parseError("incorrectly-placed-solidus")
-
- token["type"] = "StartTag"
- if ":" in token["name"][1:-1]:
- self.tokenizer.tokenQueue.append({"type":"EndTag","name":token["name"]})
-
- if token["type"] == "StartTag":
- token["data"] = dict(token["data"][::-1])
- # XXXTB: lowercase attribute names (and don't do it in the tokenizer)
-
- if token["type"] in ("StartTag", "EndTag") and ":" not in token["name"][1:-1]:
- token["name"] = token["name"].translate(asciiUpper2Lower)
+ if token["type"] in ("StartTag", "EmptyTag"):
+ if ":" not in token["name"][1:-1]:
+ # Lowercase only "unprefixed tag names"
+ token["name"] = token["name"].translate(asciiUpper2Lower)
+ token["data"] = dict([(name.translate(asciiUpper2Lower),value) for name,value in token["data"][::-1]])
+ else:
+ lowercasedAttributeNames = []
+ attrDict = {}
+ for name,value in token["data"]:
+ lowercaseName = name.translate(asciiUpper2Lower)
+ if lowercaseName not in lowercasedAttributeNames:
+ attrDict[name] = value
+ token["data"] = attrDict
+
+ if token["type"] == "EmptyTag":
+ # When a solidus (/) is encountered within a tag name what happens
+ # depends on whether the current tag name matches that of a void
+ # element or is a "prefixed tag name".
+ if ":" in token["name"][1:-1]:
+ # Process both a Start and an End tag
+ save = self.tokenizer.contentModelFlag
+ self.phase.processStartTag(token["name"], token["data"])
+ self.tokenizer.contentModelFlag = save
+ token["data"] = {}
+ token["type"] = "EndTag"
+ else:
+ if token["name"] not in voidElements:
+ self.parseError("incorrectly-placed-solidus")
+ token["type"] = "StartTag"
return token
View
20 python/src/html5lib/tokenizer.py
@@ -526,13 +526,19 @@ def attributeNameState(self):
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
- self.currentToken["data"][-1][0] = (
- self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
- for name, value in self.currentToken["data"][:-1]:
- if self.currentToken["data"][-1][0] == name:
- self.tokenQueue.append({"type": "ParseError", "data":
- "duplicate-attribute"})
- break
+ if ":" in self.currentToken["name"]:
+ for name, value in self.currentToken["data"][:-1]:
+ if self.currentToken["data"][-1][0] == name:
+ self.tokenQueue.append({"type": "ParseError", "data":
+ "duplicate-attribute"})
+ break
+ else:
+ lowercaseName = self.currentToken["data"][-1][0].translate(asciiUpper2Lower)
+ for name, value in self.currentToken["data"][:-1]:
+ if lowercaseName == name.translate(asciiUpper2Lower):
+ self.tokenQueue.append({"type": "ParseError", "data":
+ "duplicate-attribute"})
+ break
# XXX Fix for above XXX
if emitToken:
self.emitCurrentToken()

0 comments on commit 90eba7b

Please sign in to comment.