Fix bug 819912 - Duplicate heading IDs

mdn · Mar 6, 2014 · f281169 · f281169
1 parent 20eed50
commit f281169
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 1 deletion.
diff --git a/apps/wiki/content.py b/apps/wiki/content.py
@@ -555,7 +555,8 @@ def __iter__(self):
             buffer.append(token)
             if 'StartTag' == token['type']:
                 attrs = dict(token['data'])
-                if 'id' in attrs:
+                # The header tags IDs will be (re)evaluated in pass 2
+                if 'id' in attrs and token['name'] not in HEAD_TAGS:
                     self.known_ids.add(attrs['id'])
                 if 'name' in attrs:
                     self.known_ids.add(attrs['name'])
@@ -609,8 +610,17 @@ def __iter__(self):
                 slug = self.slugify(u''.join(text))
                 if not slug:
                     slug = self.gen_id()
+                else:
+                    # Create unique slug for heading tags with the same content
+                    start_inc = 2
+                    slug_base = slug
+                    while slug in self.known_ids:
+                        slug = '{0}_{1}'.format(slug_base, start_inc)
+                        start_inc += 1
+
                 attrs['id'] = slug
                 start['data'] = attrs.items()
+                self.known_ids.add(slug)
 
                 # Finally, emit the tokens we scooped up for the header.
                 yield start

diff --git a/apps/wiki/tests/test_content.py b/apps/wiki/tests/test_content.py
@@ -82,6 +82,38 @@ def test_section_ids(self):
             ok_(id not in seen_ids)
             seen_ids.add(id)
 
+    def test_incremented_section_ids(self):
+
+        doc_src = """
+        <h1 class="header1">Header One</h1>
+        <h1>Header One</h1>
+        <h1>Header One</h1>
+        <h1>Header Two</h1>
+        <h1 name="someId">Header Two</h1>
+        """
+
+        result_src = (wiki.content
+                      .parse(doc_src)
+                      .injectSectionIDs()
+                      .serialize())
+
+        expected = """
+        <h1 class="header1" id="Header_One">Header One</h1>
+        <h1 id="Header_One_2">Header One</h1>
+        <h1 id="Header_One_3">Header One</h1>
+        <h1 id="Header_Two">Header Two</h1>
+        <h1 id="someId" name="someId">Header Two</h1>
+        """
+
+        eq_(result_src, expected)
+
+        # Ensure 1, 2 doesn't turn into 3, 4
+        result_src = (wiki.content
+                      .parse(expected)
+                      .injectSectionIDs()
+                      .serialize())
+        eq_(result_src, expected)
+
     def test_simple_implicit_section_extract(self):
         doc_src = """
             <h1 id="s1">Head 1</h1>