Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate search index entries for all headings, not just H1 and H2 #644

Merged
merged 3 commits into from
Jun 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions mkdocs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class SearchIndex(object):
"""
Search index is a collection of pages and sections (H1 and H2
Search index is a collection of pages and sections (heading
tags and their following content are sections).
"""

Expand All @@ -26,9 +26,9 @@ def _find_toc_by_id(self, toc, id_):
for toc_item in toc:
if toc_item.url[1:] == id_:
return toc_item
for toc_sub_item in toc_item.children:
if toc_sub_item.url[1:] == id_:
return toc_sub_item
toc_item_r = self._find_toc_by_id(toc_item.children, id_)
if toc_item_r is not None:
return toc_item_r
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if statement could be simplified with return toc_item_r

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That wouldn't work because recursion breaks as soon as it reaches the first empty list of children. For example:

() recursing [u'#_1', u'#_2'] for dnsmasq
(_1) recursing [u'#capwap-split-mac', u'#local-switching-vs-overlay-tunnels', u'#vlans-vlan-profiles-overlayvlanwla-tunnels'] for dnsmasq
(capwap-split-mac) recursing [] for dnsmasq
(_1) recursion returned <type 'NoneType'>
() recursion returned <type 'NoneType'>

So _find_toc_by_id returns None.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, sorry. Good point. I totally miss-read it.


def _add_entry(self, title, text, loc):
"""
Expand All @@ -44,7 +44,7 @@ def _add_entry(self, title, text, loc):
def add_entry_from_context(self, page, content, toc):
"""
Create a set of entries in the index for a page. One for
the page itself and then one for each of it's H1 and H2
the page itself and then one for each of its' heading
tags.
"""

Expand Down Expand Up @@ -144,7 +144,7 @@ def __eq__(self, other):
class ContentParser(HTMLParser):
"""
Given a block of HTML, group the content under the preceding
H1 or H2 tags which can then be used for creating an index
heading tags which can then be used for creating an index
for that section.
"""

Expand All @@ -161,8 +161,8 @@ def __init__(self, *args, **kwargs):
def handle_starttag(self, tag, attrs):
"""Called at the start of every HTML tag."""

# We only care about the opening tag for H1 and H2.
if tag not in ("h1", "h2"):
# We only care about the opening tag for headings.
if tag not in (["h%d" % x for x in range(1, 7)]):
return

# We are dealing with a new header, create a new section
Expand All @@ -178,8 +178,8 @@ def handle_starttag(self, tag, attrs):
def handle_endtag(self, tag):
"""Called at the end of every HTML tag."""

# We only care about the opening tag for H1 and H2.
if tag not in ("h1", "h2"):
# We only care about the opening tag for headings.
if tag not in (["h%d" % x for x in range(1, 7)]):
return

self.is_header_tag = False
Expand All @@ -191,7 +191,7 @@ def handle_data(self, data):

if self.section is None:
# This means we have some content at the start of the
# HTML before we reach a H1 or H2. We don't actually
# HTML before we reach a heading tag. We don't actually
# care about that content as it will be added to the
# overall page entry in the search. So just skip it.
return
Expand Down
11 changes: 8 additions & 3 deletions mkdocs/tests/search_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def test_find_toc_by_id(self):
self.assertEqual(toc_item2.title, "Heading 2")

toc_item3 = index._find_toc_by_id(toc, "heading-3")
self.assertEqual(toc_item3, None)
self.assertEqual(toc_item3.url, "#heading-3")
self.assertEqual(toc_item3.title, "Heading 3")

def test_create_search_index(self):

Expand Down Expand Up @@ -123,7 +124,7 @@ def test_create_search_index(self):
index = search.SearchIndex()
index.add_entry_from_context(page, html_content, toc)

self.assertEqual(len(index._entries), 3)
self.assertEqual(len(index._entries), 4)

loc = page.abs_url

Expand All @@ -136,5 +137,9 @@ def test_create_search_index(self):
self.assertEqual(index._entries[1]['location'], "{0}#heading-1".format(loc))

self.assertEqual(index._entries[2]['title'], "Heading 2")
self.assertEqual(strip_whitespace(index._entries[2]['text']), "Content2Heading3Content3")
self.assertEqual(strip_whitespace(index._entries[2]['text']), "Content2")
self.assertEqual(index._entries[2]['location'], "{0}#heading-2".format(loc))

self.assertEqual(index._entries[3]['title'], "Heading 3")
self.assertEqual(strip_whitespace(index._entries[3]['text']), "Content3")
self.assertEqual(index._entries[3]['location'], "{0}#heading-3".format(loc))