Skip to content

Commit

Permalink
Add command fuzzy matching
Browse files Browse the repository at this point in the history
The command 'openstack user' throws error with no helpful message even though 'openstack user list' works,
that is really a bad UX. This patch adds fuzzy matching to print the most similar command when user mistypes
the command.
Use Demeraou-Levenshtein algorithm to find the best similarity. It takes experience from Git's
algothrim inplement git/git@8af84da


$ openstack user
openstack: 'user' is not an openstack command. See 'openstack --help'.
Did you mean one of these?
  user create
  user delete
  user list
  user password set
  user set
  user show
  consumer create
  consumer delete
  consumer list
  consumer set
  consumer show

Change-Id: Id8732504c0b36177319fc33fae7e630b7b714be7
Closes-Bug: 1462192
  • Loading branch information
kafka committed Jul 24, 2015
1 parent b39e2f2 commit 3396764
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 5 deletions.
54 changes: 51 additions & 3 deletions cliff/app.py
Expand Up @@ -9,9 +9,11 @@
import logging.handlers
import os
import sys
import operator

from .complete import CompleteCommand
from .help import HelpAction, HelpCommand
from .utils import damerau_levenshtein, COST

# Make sure the cliff library has a logging handler
# in case the app developer doesn't set up logging.
Expand Down Expand Up @@ -287,14 +289,60 @@ def interact(self):
self.interpreter.cmdloop()
return 0

def get_fuzzy_matches(self, cmd):
"""return fuzzy matches of unknown command
"""

sep = '_'
if self.command_manager.convert_underscores:
sep = ' '
all_cmds = [k[0] for k in self.command_manager]
dist = []
for candidate in sorted(all_cmds):
prefix = candidate.split(sep)[0]
# Give prefix match a very good score
if candidate.startswith(cmd):
dist.append((candidate, 0))
continue
# Levenshtein distance
dist.append((candidate, damerau_levenshtein(cmd, prefix, COST)+1))
dist = sorted(dist, key=operator.itemgetter(1, 0))
matches = []
i = 0
# Find the best similarity
while (not dist[i][1]):
matches.append(dist[i][0])
i += 1
best_similarity = dist[i][1]
while (dist[i][1] == best_similarity):
matches.append(dist[i][0])
i += 1

return matches

def run_subcommand(self, argv):
try:
subcommand = self.command_manager.find_command(argv)
except ValueError as err:
if self.options.debug:
raise
# If there was no exact match, try to find a fuzzy match
the_cmd = argv[0]
fuzzy_matches = self.get_fuzzy_matches(the_cmd)
if fuzzy_matches:
article = 'a'
if self.NAME[0] in 'aeiou':
article = 'an'
self.stdout.write('%s: \'%s\' is not %s %s command. '
'See \'%s --help\'.\n'
% (self.NAME, the_cmd, article,
self.NAME, self.NAME))
self.stdout.write('Did you mean one of these?\n')
for match in fuzzy_matches:
self.stdout.write(' %s\n' % match)
else:
self.LOG.error(err)
if self.options.debug:
raise
else:
self.LOG.error(err)
return 2
cmd_factory, cmd_name, sub_argv = subcommand
kwargs = {}
Expand Down
20 changes: 18 additions & 2 deletions cliff/tests/test_app.py
Expand Up @@ -3,8 +3,7 @@
try:
from StringIO import StringIO
except ImportError:
# Probably python 3, that test won't be run so ignore the error
pass
from io import StringIO
import sys

import nose
Expand All @@ -13,6 +12,7 @@
from cliff.app import App
from cliff.command import Command
from cliff.commandmanager import CommandManager
from cliff.tests import utils


def make_app(**kwargs):
Expand Down Expand Up @@ -412,3 +412,19 @@ def test_unknown_cmd_debug():
app.run(['--debug', 'hell']) == 2
except ValueError as err:
assert "['hell']" in ('%s' % err)


def test_list_matching_commands():
stdout = StringIO()
app = App('testing', '1',
utils.TestCommandManager(utils.TEST_NAMESPACE),
stdout=stdout)
app.NAME = 'test'
try:
assert app.run(['t']) == 2
except SystemExit:
pass
output = stdout.getvalue()
assert "test: 't' is not a test command. See 'test --help'." in output
assert 'Did you mean one of these?' in output
assert 'three word command\n two words\n' in output
88 changes: 88 additions & 0 deletions cliff/utils.py
@@ -0,0 +1,88 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Each edit operation is assigned different cost, such as:
# 'w' means swap operation, the cost is 0;
# 's' means substitution operation, the cost is 2;
# 'a' means insertion operation, the cost is 1;
# 'd' means deletion operation, the cost is 3;
# The smaller cost results in the better similarity.
COST = {'w': 0, 's': 2, 'a': 1, 'd': 3}


def damerau_levenshtein(s1, s2, cost):
"""Calculates the Damerau-Levenshtein distance between two strings.
The Levenshtein distance says the minimum number of single-character edits
(i.e. insertions, deletions, swap or substitution) required to change one
string to the other.
The idea is to reserve a matrix to hold the Levenshtein distances between
all prefixes of the first string and all prefixes of the second, then we
can compute the values in the matrix in a dynamic programming fashion. To
avoid a large space complexity, only the last three rows in the matrix is
needed.(row2 holds the current row, row1 holds the previous row, and row0
the row before that.)
More details:
https://en.wikipedia.org/wiki/Levenshtein_distance
https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f
"""

if s1 == s2:
return 0

len1 = len(s1)
len2 = len(s2)

if len1 == 0:
return len2 * cost['a']
if len2 == 0:
return len1 * cost['d']

row1 = [i * cost['a'] for i in range(len2 + 1)]
row2 = row1[:]
row0 = row1[:]

for i in range(len1):
row2[0] = (i + 1) * cost['d']

for j in range(len2):

# substitution
sub_cost = row1[j] + (s1[i] != s2[j]) * cost['s']

# insertion
ins_cost = row2[j] + cost['a']

# deletion
del_cost = row1[j + 1] + cost['d']

# swap
swp_condition = ((i > 0)
and (j > 0)
and (s1[i - 1] == s2[j])
and (s1[i] == s2[j - 1])
)

# min cost
if swp_condition:
swp_cost = row0[j - 1] + cost['w']
p_cost = min(sub_cost, ins_cost, del_cost, swp_cost)
else:
p_cost = min(sub_cost, ins_cost, del_cost)

row2[j + 1] = p_cost

row0, row1, row2 = row1, row2, row0

return row1[-1]

0 comments on commit 3396764

Please sign in to comment.