Add command fuzzy matching

The command 'openstack user' throws error with no helpful message even though 'openstack user list' works, that is really a bad UX. This patch adds fuzzy matching to print the most similar command when user mistypes the command. Use Demeraou-Levenshtein algorithm to find the best similarity. It takes experience from Git's algothrim inplement git/git@8af84da $ openstack user openstack: 'user' is not an openstack command. See 'openstack --help'. Did you mean one of these? user create user delete user list user password set user set user show consumer create consumer delete consumer list consumer set consumer show Change-Id: Id8732504c0b36177319fc33fae7e630b7b714be7 Closes-Bug: 1462192
openstack · Jul 24, 2015 · 3396764 · 3396764
1 parent b39e2f2
commit 3396764
Show file tree

Hide file tree

Showing 3 changed files with 157 additions and 5 deletions.
diff --git a/cliff/app.py b/cliff/app.py
@@ -9,9 +9,11 @@
 import logging.handlers
 import os
 import sys
+import operator
 
 from .complete import CompleteCommand
 from .help import HelpAction, HelpCommand
+from .utils import damerau_levenshtein, COST
 
 # Make sure the cliff library has a logging handler
 # in case the app developer doesn't set up logging.
@@ -287,14 +289,60 @@ def interact(self):
         self.interpreter.cmdloop()
         return 0
 
+    def get_fuzzy_matches(self, cmd):
+        """return fuzzy matches of unknown command
+        """
+
+        sep = '_'
+        if self.command_manager.convert_underscores:
+            sep = ' '
+        all_cmds = [k[0] for k in self.command_manager]
+        dist = []
+        for candidate in sorted(all_cmds):
+            prefix = candidate.split(sep)[0]
+            # Give prefix match a very good score
+            if candidate.startswith(cmd):
+                dist.append((candidate, 0))
+                continue
+            # Levenshtein distance
+            dist.append((candidate, damerau_levenshtein(cmd, prefix, COST)+1))
+        dist = sorted(dist, key=operator.itemgetter(1, 0))
+        matches = []
+        i = 0
+        # Find the best similarity
+        while (not dist[i][1]):
+            matches.append(dist[i][0])
+            i += 1
+        best_similarity = dist[i][1]
+        while (dist[i][1] == best_similarity):
+            matches.append(dist[i][0])
+            i += 1
+
+        return matches
+
     def run_subcommand(self, argv):
         try:
             subcommand = self.command_manager.find_command(argv)
         except ValueError as err:
-            if self.options.debug:
-                raise
+            # If there was no exact match, try to find a fuzzy match
+            the_cmd = argv[0]
+            fuzzy_matches = self.get_fuzzy_matches(the_cmd)
+            if fuzzy_matches:
+                article = 'a'
+                if self.NAME[0] in 'aeiou':
+                    article = 'an'
+                self.stdout.write('%s: \'%s\' is not %s %s command. '
+                                  'See \'%s --help\'.\n'
+                                  % (self.NAME, the_cmd, article,
+                                      self.NAME, self.NAME))
+                self.stdout.write('Did you mean one of these?\n')
+                for match in fuzzy_matches:
+                    self.stdout.write('  %s\n' % match)
             else:
-                self.LOG.error(err)
+                if self.options.debug:
+                    raise
+                else:
+                    self.LOG.error(err)
             return 2
         cmd_factory, cmd_name, sub_argv = subcommand
         kwargs = {}

diff --git a/cliff/tests/test_app.py b/cliff/tests/test_app.py
@@ -3,8 +3,7 @@
 try:
     from StringIO import StringIO
 except ImportError:
-    # Probably python 3, that test won't be run so ignore the error
-    pass
+    from io import StringIO
 import sys
 
 import nose
@@ -13,6 +12,7 @@
 from cliff.app import App
 from cliff.command import Command
 from cliff.commandmanager import CommandManager
+from cliff.tests import utils
 
 
 def make_app(**kwargs):
@@ -412,3 +412,19 @@ def test_unknown_cmd_debug():
         app.run(['--debug', 'hell']) == 2
     except ValueError as err:
         assert "['hell']" in ('%s' % err)
+
+
+def test_list_matching_commands():
+    stdout = StringIO()
+    app = App('testing', '1',
+              utils.TestCommandManager(utils.TEST_NAMESPACE),
+              stdout=stdout)
+    app.NAME = 'test'
+    try:
+        assert app.run(['t']) == 2
+    except SystemExit:
+        pass
+    output = stdout.getvalue()
+    assert "test: 't' is not a test command. See 'test --help'." in output
+    assert 'Did you mean one of these?' in output
+    assert 'three word command\n  two words\n' in output
diff --git a/cliff/utils.py b/cliff/utils.py
@@ -0,0 +1,88 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Each edit operation is assigned different cost, such as:
+#  'w' means swap operation, the cost is 0;
+#  's' means substitution operation, the cost is 2;
+#  'a' means insertion operation, the cost is 1;
+#  'd' means deletion operation, the cost is 3;
+# The smaller cost results in the better similarity.
+COST = {'w': 0, 's': 2, 'a': 1, 'd': 3}
+
+
+def damerau_levenshtein(s1, s2, cost):
+    """Calculates the Damerau-Levenshtein distance between two strings.
+
+    The Levenshtein distance says the minimum number of single-character edits
+    (i.e. insertions, deletions, swap or substitution) required to change one
+    string to the other.
+    The idea is to reserve a matrix to hold the Levenshtein distances between
+    all prefixes of the first string and all prefixes of the second, then we
+    can compute the values in the matrix in a dynamic programming fashion. To
+    avoid a large space complexity, only the last three rows in the matrix is
+    needed.(row2 holds the current row, row1 holds the previous row, and row0
+    the row before that.)
+
+    More details:
+        https://en.wikipedia.org/wiki/Levenshtein_distance
+        https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f
+    """
+
+    if s1 == s2:
+        return 0
+
+    len1 = len(s1)
+    len2 = len(s2)
+
+    if len1 == 0:
+        return len2 * cost['a']
+    if len2 == 0:
+        return len1 * cost['d']
+
+    row1 = [i * cost['a'] for i in range(len2 + 1)]
+    row2 = row1[:]
+    row0 = row1[:]
+
+    for i in range(len1):
+        row2[0] = (i + 1) * cost['d']
+
+        for j in range(len2):
+
+            # substitution
+            sub_cost = row1[j] + (s1[i] != s2[j]) * cost['s']
+
+            # insertion
+            ins_cost = row2[j] + cost['a']
+
+            # deletion
+            del_cost = row1[j + 1] + cost['d']
+
+            # swap
+            swp_condition = ((i > 0)
+                             and (j > 0)
+                             and (s1[i - 1] == s2[j])
+                             and (s1[i] == s2[j - 1])
+                             )
+
+            # min cost
+            if swp_condition:
+                swp_cost = row0[j - 1] + cost['w']
+                p_cost = min(sub_cost, ins_cost, del_cost, swp_cost)
+            else:
+                p_cost = min(sub_cost, ins_cost, del_cost)
+
+            row2[j + 1] = p_cost
+
+        row0, row1, row2 = row1, row2, row0
+
+    return row1[-1]