Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Only lowercase attribute names on tokens with an "unprefixed tag name…

…" (also used when dropping duplicates)
  • Loading branch information...
commit 90eba7b8d15b18f0893fce75a8e7b08e6957ccf5 1 parent df8aa3f
t.broyer authored
Showing with 42 additions and 26 deletions.
  1. +29 −19 python/src/html5lib/html5parser.py
  2. +13 −7 python/src/html5lib/tokenizer.py
View
48 python/src/html5lib/html5parser.py
@@ -176,25 +176,35 @@ def parseError(self, errorcode="XXX-undefined-error", datavars={}):
def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """
- if token["type"] == "EmptyTag":
- # When a solidus (/) is encountered within a tag name what happens
- # depends on whether the current tag name matches that of a void
- # element. If it matches a void element atheists did the wrong
- # thing and if it doesn't it's wrong for everyone.
-
- if token["name"] not in voidElements and ":" not in token["name"][1:-1]:
- self.parseError("incorrectly-placed-solidus")
-
- token["type"] = "StartTag"
- if ":" in token["name"][1:-1]:
- self.tokenizer.tokenQueue.append({"type":"EndTag","name":token["name"]})
-
- if token["type"] == "StartTag":
- token["data"] = dict(token["data"][::-1])
- # XXXTB: lowercase attribute names (and don't do it in the tokenizer)
-
- if token["type"] in ("StartTag", "EndTag") and ":" not in token["name"][1:-1]:
- token["name"] = token["name"].translate(asciiUpper2Lower)
+ if token["type"] in ("StartTag", "EmptyTag"):
+ if ":" not in token["name"][1:-1]:
+ # Lowercase only "unprefixed tag names"
+ token["name"] = token["name"].translate(asciiUpper2Lower)
+ token["data"] = dict([(name.translate(asciiUpper2Lower),value) for name,value in token["data"][::-1]])
+ else:
+ lowercasedAttributeNames = []
+ attrDict = {}
+ for name,value in token["data"]:
+ lowercaseName = name.translate(asciiUpper2Lower)
+ if lowercaseName not in lowercasedAttributeNames:
+ attrDict[name] = value
+ token["data"] = attrDict
+
+ if token["type"] == "EmptyTag":
+ # When a solidus (/) is encountered within a tag name what happens
+ # depends on whether the current tag name matches that of a void
+ # element or is a "prefixed tag name".
+ if ":" in token["name"][1:-1]:
+ # Process both a Start and an End tag
+ save = self.tokenizer.contentModelFlag
+ self.phase.processStartTag(token["name"], token["data"])
+ self.tokenizer.contentModelFlag = save
+ token["data"] = {}
+ token["type"] = "EndTag"
+ else:
+ if token["name"] not in voidElements:
+ self.parseError("incorrectly-placed-solidus")
+ token["type"] = "StartTag"
return token
View
20 python/src/html5lib/tokenizer.py
@@ -526,13 +526,19 @@ def attributeNameState(self):
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
- self.currentToken["data"][-1][0] = (
- self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
- for name, value in self.currentToken["data"][:-1]:
- if self.currentToken["data"][-1][0] == name:
- self.tokenQueue.append({"type": "ParseError", "data":
- "duplicate-attribute"})
- break
+ if ":" in self.currentToken["name"]:
+ for name, value in self.currentToken["data"][:-1]:
+ if self.currentToken["data"][-1][0] == name:
+ self.tokenQueue.append({"type": "ParseError", "data":
+ "duplicate-attribute"})
+ break
+ else:
+ lowercaseName = self.currentToken["data"][-1][0].translate(asciiUpper2Lower)
+ for name, value in self.currentToken["data"][:-1]:
+ if lowercaseName == name.translate(asciiUpper2Lower):
+ self.tokenQueue.append({"type": "ParseError", "data":
+ "duplicate-attribute"})
+ break
# XXX Fix for above XXX
if emitToken:
self.emitCurrentToken()
Please sign in to comment.
Something went wrong with that request. Please try again.