Skip to content

Commit

Permalink
[ADT][NFC] Early bail out for ComputeEditDistance
Browse files Browse the repository at this point in the history
The minimun bound for number of edits is the size difference between the 2 arrays.
If MaxEditDistance is smaller than this, we can bail out early without needing to traverse any of the arrays.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D127070
  • Loading branch information
njames93 committed Jun 8, 2022
1 parent d484797 commit 638b0fb
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/include/llvm/ADT/edit_distance.h
Expand Up @@ -61,6 +61,15 @@ unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
typename ArrayRef<T>::size_type m = FromArray.size();
typename ArrayRef<T>::size_type n = ToArray.size();

if (MaxEditDistance) {
// If the difference in size between the 2 arrays is larger than the max
// distance allowed, we can bail out as we will always need at least
// MaxEditDistance insertions or removals.
typename ArrayRef<T>::size_type AbsDiff = m > n ? m - n : n - m;
if (AbsDiff > MaxEditDistance)
return MaxEditDistance + 1;
}

const unsigned SmallBufferSize = 64;
unsigned SmallBuffer[SmallBufferSize];
std::unique_ptr<unsigned[]> Allocated;
Expand Down
1 change: 1 addition & 0 deletions llvm/unittests/ADT/CMakeLists.txt
Expand Up @@ -22,6 +22,7 @@ add_llvm_unittest(ADTTests
DenseSetTest.cpp
DepthFirstIteratorTest.cpp
DirectedGraphTest.cpp
EditDistanceTest.cpp
EnumeratedArrayTest.cpp
EquivalenceClassesTest.cpp
FallibleIteratorTest.cpp
Expand Down
63 changes: 63 additions & 0 deletions llvm/unittests/ADT/EditDistanceTest.cpp
@@ -0,0 +1,63 @@
//===- llvm/unittest/Support/EditDistanceTest.cpp - Edit distance tests ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/edit_distance.h"
#include "gtest/gtest.h"
#include <cstdlib>

using namespace llvm;

namespace {

struct Result {
unsigned NumMaps;
unsigned EditDist;
};
} // namespace

static Result editDistanceAndMaps(StringRef A, StringRef B,
unsigned MaxEditDistance = 0) {
unsigned NumMaps = 0;
auto TrackMaps = [&](const char X) {
++NumMaps;
return X;
};
unsigned EditDist = llvm::ComputeMappedEditDistance(
makeArrayRef(A.data(), A.size()), makeArrayRef(B.data(), B.size()),
TrackMaps, true, MaxEditDistance);
return {NumMaps, EditDist};
}

TEST(EditDistance, VerifyShortCircuit) {
StringRef Hello = "Hello";
StringRef HelloWorld = "HelloWorld";
Result R = editDistanceAndMaps(Hello, HelloWorld, 5);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_GT(R.NumMaps, 0U);

R = editDistanceAndMaps(Hello, HelloWorld);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_GT(R.NumMaps, 0U);

R = editDistanceAndMaps(Hello, HelloWorld, 4);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_EQ(R.NumMaps, 0U);

R = editDistanceAndMaps(HelloWorld, Hello, 4);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_EQ(R.NumMaps, 0U);

R = editDistanceAndMaps(Hello, HelloWorld, 1);
EXPECT_EQ(R.EditDist, 2U);
EXPECT_EQ(R.NumMaps, 0U);

R = editDistanceAndMaps(HelloWorld, Hello, 1);
EXPECT_EQ(R.EditDist, 2U);
EXPECT_EQ(R.NumMaps, 0U);
}

0 comments on commit 638b0fb

Please sign in to comment.