Add descriptor plugin for description column while browsing.

Fixes bug 1177125.
mozilla · Jul 18, 2016 · 91cef85 · 91cef85
2 parents 270077c + 35fbef5
commit 91cef85
Show file tree

Hide file tree

Showing 21 changed files with 347 additions and 35 deletions.
diff --git a/docs/source/development.rst b/docs/source/development.rst
@@ -203,6 +203,7 @@ collection of subcomponents which do the actual work:
 
 .. digraph:: plugin
 
+   "Plugin" -> "FolderToIndex";
    "Plugin" -> "TreeToIndex" -> "FileToIndex";
    "Plugin" -> "FileToSkim";
    "Plugin" -> "filters";
@@ -248,8 +249,14 @@ manually:
     .. autoclass:: dxr.plugins.Plugin
        :members:
 
-Actual plugin functionality is implemented within tree indexers, file
-indexers, filters, and skimmers.
+Actual plugin functionality is implemented within file indexers, tree indexers,
+folder indexers, filters, and skimmers.
+
+Folder Indexers
+===============
+
+.. autoclass:: dxr.indexers.FolderToIndex
+   :members:
 
 Tree Indexers
 =============

diff --git a/dxr/app.py b/dxr/app.py
@@ -358,6 +358,15 @@ def browse(tree, path=''):
                             frozen['generated_date'])
 
 
+def concat_plugin_headers(plugin_list):
+    """Return a list of the concatenation of all browse_headers in the
+    FolderToIndexes of given plugin list.
+
+    """
+    return list(chain.from_iterable(p.folder_to_index.browse_headers
+                                    for p in plugin_list if p.folder_to_index))
+
+
 def _browse_folder(tree, path, config):
     """Return a rendered folder listing for folder ``path``.
 
@@ -378,14 +387,15 @@ def item_or_list(item):
 
     frozen = frozen_config(tree)
 
+    plugin_headers = concat_plugin_headers(plugins_named(frozen['enabled_plugins']))
     files_and_folders = filtered_query(
         frozen['es_alias'],
         FILE,
         filter={'folder': path},
         sort=[{'is_folder': 'desc'}, 'name'],
         size=1000000,
         include=['name', 'modified', 'size', 'link', 'path', 'is_binary',
-                 'is_folder'])
+                 'is_folder'] + plugin_headers)
 
     if not files_and_folders:
         raise NotFound
@@ -403,6 +413,7 @@ def item_or_list(item):
         generated_date=frozen['generated_date'],
         google_analytics_key=config.google_analytics_key,
         paths_and_names=_linked_pathname(path, tree),
+        plugin_headers=plugin_headers,
         filters=filter_menu_items(
             plugins_named(frozen['enabled_plugins'])),
         # Autofocus only at the root of each tree:
@@ -416,6 +427,7 @@ def item_or_list(item):
              f['name'],
              decode_es_datetime(item_or_list(f['modified'])) if 'modified' in f else None,
              f.get('size'),
+             [f.get(h, [''])[0] for h in plugin_headers],
              url_for('.browse', tree=tree, path=f.get('link', f['path'])[0]))
             for f in files_and_folders])
 

diff --git a/dxr/build.py b/dxr/build.py
@@ -378,8 +378,9 @@ def unicode_contents(path, encoding_guess):  # TODO: Make accessible to TreeToIn
 
 
 def unignored(folder, ignore_paths, ignore_filenames, want_folders=False):
-    """Return an iterable of absolute paths to unignored source tree files or
-    the folders that contain them.
+    """Return an iterable of Unicode absolute paths to unignored source
+    tree files or the folders that contain them. Skip any non-utf8-decodeable
+    paths.
 
     Returned files include both binary and text ones.
 
@@ -410,15 +411,21 @@ def raise_(exc):
                 if any(fnmatchcase("/" + path.replace(os.sep, "/"), e) for e in ignore_paths):
                     continue  # Ignore the file.
 
-                yield join(root, f)
+                try:
+                    yield join(root, f).decode('utf-8')
+                except UnicodeDecodeError:
+                    pass
 
         # Exclude folders that match an ignore pattern.
         # os.walk listens to any changes we make in `folders`.
         folders[:] = _unignored_folders(
             folders, rel_path, ignore_filenames, ignore_paths)
         if want_folders:
             for f in folders:
-                yield join(root, f)
+                try:
+                    yield join(root, f).decode('utf-8')
+                except UnicodeDecodeError:
+                    pass
 
 def index_file(tree, tree_indexers, path, es, index):
     """Index a single file into ES, and build a static HTML representation of it.
@@ -446,10 +453,6 @@ def index_file(tree, tree_indexers, path, es, index):
     # Just like index_folders, if the path is not in UTF-8, then elasticsearch
     # will not accept the path, so just move on.
     rel_path = relpath(path, tree.source_folder)
-    try:
-        rel_path = rel_path.decode('utf-8')
-    except UnicodeDecodeError:
-        return
     is_text = isinstance(contents, unicode)
     is_link = islink(path)
     # Index by line if the contents are text and the path is not a symlink.
@@ -572,7 +575,7 @@ def index_chunk(tree,
                        open_log(tree.log_folder,
                                 'index-chunk-%s.log' % worker_number))
                 for path in paths:
-                    log and log.write('Starting %s.\n' % path)
+                    log and log.write('Starting %s.\n' % path.encode('utf-8'))
                     index_file(tree, tree_indexers, path, es, index)
                 log and log.write('Finished chunk.\n')
             finally:
@@ -587,26 +590,19 @@ def index_chunk(tree,
 
 def index_folders(tree, index, es):
     """Index the folder hierarchy into ES."""
+    folder_indexers = [(p.name, p.folder_to_index)
+                       for p in tree.enabled_plugins if p.folder_to_index]
     with aligned_progressbar(unignored(tree.source_folder,
                                        tree.ignore_paths,
                                        tree.ignore_filenames,
                                        want_folders=True),
                      show_eta=False,  # never even close
                      label='Indexing folders') as folders:
         for folder in folders:
-            rel_path = relpath(folder, tree.source_folder)
-            # If the path is not in UTF-8, then elasticsearch will not
-            # accept the path, so just move on.
-            try:
-                rel_path = rel_path.decode('utf-8')
-            except UnicodeDecodeError:
-                continue
-            superfolder_path, folder_name = split(rel_path)
-            es.index(index, FILE, {
-                'path': [rel_path],  # array for consistency with non-folder file docs
-                'folder': superfolder_path,
-                'name': folder_name,
-                'is_folder': True})
+            needles = {'is_folder': True}
+            for name, folder_to_index in folder_indexers:
+                needles.update(dict(folder_to_index(name, tree, folder).needles()))
+            es.index(index, FILE, needles)
 
 
 def index_files(tree, tree_indexers, index, pool, es):
@@ -640,7 +636,7 @@ def path_chunks(tree):
             result = future.result()
             if result:
                 formatted_tb, type, value, path = result
-                print 'A worker failed while indexing %s:' % path
+                print 'A worker failed while indexing %s:' % path.encode('utf-8')
                 print formatted_tb
                 # Abort everything if anything fails:
                 raise type, value  # exits with non-zero

diff --git a/dxr/indexers.py b/dxr/indexers.py
@@ -65,6 +65,21 @@ def plugin_config(self):
         return getattr(self.tree, self.plugin_name)
 
 
+class FolderToIndex(PluginConfig):
+    """The FolderToIndex generates needles for folders and provides an
+    optional list of headers to display in browse view as `browse_headers`.
+    """
+    browse_headers = []
+
+    def __init__(self, plugin_name, tree, path):
+        self.plugin_name = plugin_name
+        self.tree = tree
+        self.path = path
+
+    def needles(self):
+        return []
+
+
 class TreeToIndex(PluginConfig):
     """A TreeToIndex performs build environment setup and teardown and serves
     as a repository for scratch data that should persist across an entire

diff --git a/dxr/plugins/__init__.py b/dxr/plugins/__init__.py
@@ -41,6 +41,7 @@ class Plugin(object):
     """
     def __init__(self,
                  filters=None,
+                 folder_to_index=None,
                  tree_to_index=None,
                  file_to_skim=None,
                  mappings=None,
@@ -50,6 +51,7 @@ def __init__(self,
                  config_schema=None):
         """
         :arg filters: A list of filter classes
+        :arg folder_to_index: A :class:`FolderToIndex` subclass
         :arg tree_to_index: A :class:`TreeToIndex` subclass
         :arg file_to_skim: A :class:`FileToSkim` subclass
         :arg mappings: Additional Elasticsearch mapping definitions for all the
@@ -98,6 +100,7 @@ def __init__(self,
         # we can parallelize even better. OTOH, there are probably a LOT of
         # files in any time-consuming tree, so we already have a perfectly
         # effective and easier way to parallelize.
+        self.folder_to_index = folder_to_index
         self.tree_to_index = tree_to_index
         self.file_to_skim = file_to_skim
         self.mappings = mappings or {}
@@ -138,6 +141,7 @@ class called ``FileToIndex`` (if there is one) when ``file_to_index()``
                     file_to_index_class=namespace.get('FileToIndex'))
 
         return cls(filters=filters_from_namespace(namespace),
+                   folder_to_index=namespace.get('FolderToIndex'),
                    tree_to_index=tree_to_index,
                    file_to_skim=namespace.get('FileToSkim'),
                    mappings=namespace.get('mappings'),

diff --git a/dxr/plugins/core.py b/dxr/plugins/core.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from itertools import chain
 from os import stat
-from os.path import relpath, splitext, realpath, basename
+from os.path import relpath, splitext, realpath, basename, split
 import re
 
 from flask import url_for
@@ -86,6 +86,7 @@
                 'type': 'boolean',
                 'index': 'no'
             },
+            'description': UNINDEXED_STRING,
 
             # Sidebar nav links:
             'links': {
@@ -437,6 +438,17 @@ def __init__(self, term, enabled_plugins):
         super(RefFilter, self).__init__(term, enabled_plugins, lambda f: f.is_reference)
 
 
+class FolderToIndex(dxr.indexers.FolderToIndex):
+    def needles(self):
+        rel_path = relpath(self.path, self.tree.source_folder)
+        superfolder_path, folder_name = split(rel_path)
+        return [
+            ('path', [rel_path]),  # array for consistency with non-folder file docs
+            ('folder', superfolder_path),
+            ('name', folder_name)
+        ]
+
+
 class TreeToIndex(dxr.indexers.TreeToIndex):
     def environment(self, vars):
         vars['source_folder'] = self.tree.source_folder