Skip to content

Commit

Permalink
[doctools] Report the number of topics with more than 5 words
Browse files Browse the repository at this point in the history
This is 185 out of 496 or so.  Not too bad.

Still need to clean up broken / orphaned links.
  • Loading branch information
Andy C committed Dec 4, 2023
1 parent fccc52f commit 6624570
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 35 deletions.
9 changes: 9 additions & 0 deletions doc/ref/chap-special-var.md
Expand Up @@ -204,8 +204,17 @@ The compadjust builtin uses this variable.

### REPLY

OSH read sets this:

read < myfile

### _reply

YSH read sets this:

read --line < myfile

read --all < myfile

## Functions

Expand Down
1 change: 1 addition & 0 deletions doc/ref/toc-ysh.md
Expand Up @@ -223,6 +223,7 @@ X [External Lang] BEGIN END when (awk)
[YSH Tracing] SHX_indent SHX_punct SHX_pid_str
[History] YSH_HISTFILE
[Platform] OILS_VERSION
[read] _reply
X [Wok] _filename _line
X [Builtin Sub] _buffer
```
Expand Down
20 changes: 6 additions & 14 deletions doctools/help_gen.py
Expand Up @@ -366,9 +366,10 @@ def HelpTopics(s):
class DocNode(object):
"""To visualize doc structure."""

def __init__(self, name, attrs=None):
def __init__(self, name, attrs=None, text=None):
self.name = name
self.attrs = attrs # for h2 and h3 links
self.text = text
self.children = []


Expand Down Expand Up @@ -432,7 +433,8 @@ def CardsFromChapters(out_dir, tag_level, paths):
cur_h2_node = h2
elif tag == 'h3':
name = html_lib.PrettyHref(heading, preserve_anchor_case=True)
h3 = DocNode(name, attrs=attrs)
# attach text so we can see which topics have empty bodies
h3 = DocNode(name, attrs=attrs, text=text)
cur_h2_node.children.append(h3)

if tag != tag_level:
Expand Down Expand Up @@ -461,8 +463,6 @@ def CardsFromChapters(out_dir, tag_level, paths):

root_node.children.append(page_node)

# 89 sections, 257 topics/cards
# Also want stats about which ones are done
num_sections = sum(len(child.children) for child in root_node.children)

log('%d chapters -> (doctools/make_help) -> %d <h3> cards from %d <h2> sections to %s',
Expand Down Expand Up @@ -555,7 +555,7 @@ def main(argv):
cc_prefix = argv[4]
pages = argv[5:]

topic_dict, debug_info = CardsFromChapters(out_dir, 'h3', pages)
topic_dict, _ = CardsFromChapters(out_dir, 'h3', pages)

# Write topic dict as Python and C++

Expand Down Expand Up @@ -606,27 +606,19 @@ def TopicMetadata():

elif filename.endswith('.html'):
assert filename.startswith('chap-'), path

# . CardsFromChapters() on chap-*, which gives you debug_Info above
chapters.append(path)

else:
raise RuntimeError('Expected toc-* or chap-*, got %r' % filename)

# out_dir=None so we don't write anything
topics, chap_tree = CardsFromChapters(None, 'h3', chapters)

#log('%d chapters: %s', len(chapters), chapters[:5])
log('%d topics: %s', len(topics), topics.keys()[:10])

#pprint.pprint(index_debug_info)
log('')

ref_check.Check(all_toc_nodes, chap_tree)


# TODO: check all docs
# 3. Ref Check

else:
raise RuntimeError('Invalid action %r' % action)

Expand Down
45 changes: 24 additions & 21 deletions doctools/ref_check.py
Expand Up @@ -84,14 +84,16 @@ def Check(all_toc_nodes, chap_tree):
if 0:
PrintTree(chap_tree, sys.stdout)

num_chapters = 0
num_sections = 0
num_topics = 0

num_topics_written = 0

chap_topics = collections.defaultdict(list) # topic_id -> list of chapters

min_words = 5 # arbitrary

for chap in chap_tree.children:
num_chapters += 1

for section in chap.children:
num_sections += 1
Expand All @@ -108,15 +110,23 @@ def Check(all_toc_nodes, chap_tree):
chap_topics[topic_id].append(chap.name)
link_to.add((chap.name, topic.name))

# split by whitespace
num_words = len(topic.text.split())
if num_words > min_words:
num_topics_written += 1
elif num_words > 1:
log('short: %r', topic.text)

log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
log('')

num_sections = sum(len(child.children) for child in chap_tree.children)
num_sections = sum(len(child.children) for child in chap_tree.children)
num_chapters = len(chap_tree.children)

log('Chapter stats')
log(' num chapters = %d', num_chapters)
log(' num_sections = %d', num_sections)
log(' num_topics = %d', num_topics)
log(' topics with more than %d words = %d', min_words, num_topics_written)

chap_topic_set = set(chap_topics)
log(' num unique topics = %d', len(chap_topic_set))
Expand All @@ -127,25 +137,18 @@ def Check(all_toc_nodes, chap_tree):
assert 'j8-escape' in index_topic_set
assert 'j8-escape' in chap_topic_set

broken = link_from - link_to
log('%d Broken Links:', len(broken))
for pair in sorted(broken):
log(' %s', pair)
log('')

orphaned = link_to - link_from
log('%d Orphaned Topics:', len(orphaned))
for pair in sorted(orphaned):
log(' %s', pair)
log('')


if 0:
not_linked_to = chap_topic_set - index_topic_set
broken = link_from - link_to
log('%d Broken Links:', len(broken))
for pair in sorted(broken):
log(' %s', pair)
log('')

orphaned = link_to - link_from
log('%d Orphaned Topics:', len(orphaned))
for pair in sorted(orphaned):
log(' %s', pair)
log('')
log('%d topics not linked to:', len(not_linked_to))
for topic_id in not_linked_to:
log(' %s in %s', topic_id, chap_topics[topic_id])

log('')
log('Topics in multiple chapters:')
Expand Down

0 comments on commit 6624570

Please sign in to comment.