Skip to content

Commit

Permalink
Fix quadratic runtime when updating region highlighter matches
Browse files Browse the repository at this point in the history
Running %sYeti<ret>casdf on file
[example.journal.txt](#4685 (comment))
can cause noticeable lag.  This is because we insert text at 6000
selections, which means we need to update highlighters in those lines.
The runtime for updating range highlighters is quadratic in the
number of selections: for each selection, we call on_new_range(),
which calls add_matches(), which calls std::rotate(), which needs
needs linear time.

Fix the quadratic runtime by updating all ranges in the same loop.
This means that we no longer need to use std::rotate() for every
single range; instead we can just use one call to std::inplace_merge()
for all ranges (since ranges are already sorted).

The most natural implementation would use coroutines to implement a
generator that supplies the ranges to the above loop.  Unfortunately
clang < 14 does not support coroutines when using -stdlib=libstdc++
(there is only experimental support with -stdlib=libc++). Use a
temporary vector for now.  Another alternative is to an iterator with
a custom destructor.

I used this script to benchmark the improvements.
(In hindsight I could have just used "-ui json" instead of tmux).

	#!/bin/sh
	set -ex
	N=${1:-100}
	kak=${2:-./kak.opt}
	for i in $(seq "$N")
	do
		echo -n "\
	2022-02-06 * Earth
	    expense:electronics:audio    116.7 USD
	    liability:card              -116.7 USD

	2022-02-06 * Blue Yeti USB Microphone
	    expense:electronics:audio    116.7 USD
	    liability:card              -116.7 USD

	"
	done > big-journal.ledger
	echo > .empty-tmux.conf 'set -sg escape-time 5'
	test_tmux() {
		tmux -S .tmux-socket -f .empty-tmux.conf "$@"
	}
	test_tmux new-session -d "$kak" big-journal.ledger
	test_tmux send-keys '%sYeti' Enter c 1234567890
	sleep .2
	test_tmux send-keys Escape
	while ! test_tmux capture-pane -p | grep 123
	do
		sleep .1
	done
	test_tmux send-keys ':wq' Enter
	while test_tmux ls
	do
		sleep .1
	done
	rm -f .tmux-socket .empty-tmux.conf

The average runtimes for this script show an improvement as the input
file grows:

	         kak.old  kak.new
	N=10000    1.142    0.897
	N=20000    2.879    1.400

Detailed results:

	$ hyperfine -w 1 './bench.sh 10000 ./kak.opt.'{old,new}
	Benchmark 1: ./bench.sh 10000 ./kak.opt.old
	  Time (mean ± σ):      1.142 s ±  0.072 s    [User: 0.252 s, System: 0.059 s]
	  Range (min … max):    1.060 s …  1.242 s    10 runs

	Benchmark 2: ./bench.sh 10000 ./kak.opt.new
	  Time (mean ± σ):     897.2 ms ±  19.3 ms    [User: 241.6 ms, System: 57.4 ms]
	  Range (min … max):   853.9 ms … 923.6 ms    10 runs

	Summary
	  './bench.sh 10000 ./kak.opt.new' ran
	    1.27 ± 0.09 times faster than './bench.sh 10000 ./kak.opt.old'
	$ hyperfine -w 1 './bench.sh 20000 ./kak.opt.'{old,new}
	Benchmark 1: ./bench.sh 20000 ./kak.opt.old
	  Time (mean ± σ):      2.879 s ±  0.065 s    [User: 0.553 s, System: 0.126 s]
	  Range (min … max):    2.768 s …  2.963 s    10 runs

	Benchmark 2: ./bench.sh 20000 ./kak.opt.new
	  Time (mean ± σ):      1.400 s ±  0.018 s    [User: 0.428 s, System: 0.083 s]
	  Range (min … max):    1.374 s …  1.429 s    10 runs

	Summary
	  './bench.sh 20000 ./kak.opt.new' ran
	    2.06 ± 0.05 times faster than '../repro.sh 20000 ./kak.opt.old'
  • Loading branch information
krobelus committed Sep 10, 2022
1 parent b1cd9f9 commit 4c6b44d
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 64 deletions.
69 changes: 35 additions & 34 deletions src/highlighters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2210,7 +2210,7 @@ struct RegionsHighlighter : public Highlighter
m_regexes.insert({key, Regex{str, flags}});
}

void add_matches(const Buffer& buffer, LineRange range, Cache& cache) const
void add_matches(const Buffer& buffer, const LineRangeList& ranges, Cache& cache) const
{
for (auto& [key, regex] : m_regexes)
cache.matches[key];
Expand All @@ -2226,42 +2226,42 @@ struct RegionsHighlighter : public Highlighter
for (auto& [key, regex] : m_regexes)
matchers.push_back(Matcher{cache.matches.get(key), regex});

for (auto line = range.begin; line < range.end; ++line)
for (LineRange range : ranges)
{
const StringView l = buffer[line];
const auto flags = RegexExecFlags::NotEndOfLine; // buffer line already ends with \n

for (auto& [matches, regex, pivot, vm] : matchers)
for (auto line = range.begin; line < range.end; ++line)
{
auto extra_flags = RegexExecFlags::None;
auto pos = l.begin();
while (vm.exec(pos, l.end(), l.begin(), l.end(), flags | extra_flags))
const StringView l = buffer[line];
const auto flags = RegexExecFlags::NotEndOfLine; // buffer line already ends with \n

for (auto& [matches, regex, pivot, vm] : matchers)
{
ConstArrayView<const char*> captures = vm.captures();
const bool with_capture = regex.mark_count() > 0 and captures[2] != nullptr and
captures[1] - captures[0] < std::numeric_limits<uint16_t>::max();
matches.push_back({
line,
(int)(captures[0] - l.begin()),
(int)(captures[1] - l.begin()),
(uint16_t)(with_capture ? captures[2] - captures[0] : 0),
(uint16_t)(with_capture ? captures[3] - captures[2] : 0)
});
pos = captures[1];

extra_flags = (captures[0] == captures[1]) ? RegexExecFlags::NotInitialNull : RegexExecFlags::None;
auto extra_flags = RegexExecFlags::None;
auto pos = l.begin();
while (vm.exec(pos, l.end(), l.begin(), l.end(), flags | extra_flags))
{
ConstArrayView<const char*> captures = vm.captures();
const bool with_capture = regex.mark_count() > 0 and captures[2] != nullptr and
captures[1] - captures[0] < std::numeric_limits<uint16_t>::max();
matches.push_back({
line,
(int)(captures[0] - l.begin()),
(int)(captures[1] - l.begin()),
(uint16_t)(with_capture ? captures[2] - captures[0] : 0),
(uint16_t)(with_capture ? captures[3] - captures[2] : 0)
});
pos = captures[1];

extra_flags = (captures[0] == captures[1]) ? RegexExecFlags::NotInitialNull : RegexExecFlags::None;
}
}
}
}

for (auto& [matches, regex, pivot, vm] : matchers)
{
auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin,
[](const RegexMatch& m, LineCount l) { return m.line < l; });
kak_assert(pos == matches.begin() + pivot or pos->line >= range.end); // We should not have had matches for range

// Move new matches into position.
std::rotate(pos, matches.begin() + pivot, matches.end());
std::inplace_merge(matches.begin(), matches.begin() + pivot, matches.end(),
[](const auto& lhs, const auto& rhs) { return lhs.line < rhs.line; });
}
}

Expand Down Expand Up @@ -2315,7 +2315,7 @@ struct RegionsHighlighter : public Highlighter
add_regex(region->m_recurse, region->match_capture());
}

add_matches(buffer, range, cache);
add_matches(buffer, {range}, cache);
cache.ranges.reset(range);
cache.buffer_timestamp = buffer_timestamp;
cache.regions_timestamp = m_regions_timestamp;
Expand All @@ -2333,12 +2333,13 @@ struct RegionsHighlighter : public Highlighter
modified = true;
}

cache.ranges.add_range(range, [&, this](const LineRange& range) {
if (range.begin == range.end)
return;
add_matches(buffer, range, cache);
modified = true;
});
auto touched_ranges = cache.ranges.add_range(range);
if (not touched_ranges.empty())
{
if (any_of(touched_ranges, [](auto range) { return range.begin != range.end; }))
modified = true;
add_matches(buffer, touched_ranges, cache);
}
return modified;
}
}
Expand Down
53 changes: 26 additions & 27 deletions src/line_modification.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,29 +145,31 @@ void LineRangeSet::update(ConstArrayView<LineModification> modifs)
erase(remove_if(*this, [](auto& r) { return r.begin >= r.end; }), end());
}

void LineRangeSet::add_range(LineRange range, FunctionRef<void (LineRange)> on_new_range)
LineRangeList LineRangeSet::add_range(LineRange range)
{
LineRangeList touched_ranges;
auto insert_at = std::lower_bound(begin(), end(), range.begin,
[](LineRange range, LineCount line) { return range.end < line; });
if (insert_at == end() or insert_at->begin > range.end)
on_new_range(range);
touched_ranges.push_back(range);
else
{
auto pos = range.begin;
auto it = insert_at;
for (; it != end() and it->begin <= range.end; ++it)
{
if (pos < it->begin)
on_new_range({pos, it->begin});
touched_ranges.push_back({pos, it->begin});

range = LineRange{std::min(range.begin, it->begin), std::max(range.end, it->end)};
pos = it->end;
}
insert_at = erase(insert_at, it);
if (pos < range.end)
on_new_range({pos, range.end});
touched_ranges.push_back({pos, range.end});
}
insert(insert_at, range);
return touched_ranges;
}

void LineRangeSet::remove_range(LineRange range)
Expand Down Expand Up @@ -264,64 +266,61 @@ UnitTest test_line_modifications{[]()
}};

UnitTest test_line_range_set{[]{
auto expect = [](ConstArrayView<LineRange> ranges) {
return [it = ranges.begin(), end = ranges.end()](LineRange r) mutable {
kak_assert(it != end);
kak_assert(r == *it++);
};
auto expect = [](ConstArrayView<LineRange> actual, ConstArrayView<LineRange> expected) {
kak_assert(actual == expected);
};

{
LineRangeSet ranges;
ranges.add_range({0, 5}, expect({{0, 5}}));
ranges.add_range({10, 15}, expect({{10, 15}}));
ranges.add_range({5, 10}, expect({{5, 10}}));
expect(ranges.add_range({0, 5}), {{0, 5}});
expect(ranges.add_range({10, 15}), {{10, 15}});
expect(ranges.add_range({5, 10}), {{5, 10}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 15}}));
ranges.add_range({5, 10}, expect({}));
expect(ranges.add_range({5, 10}), {});
ranges.remove_range({3, 8});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 3}, {8, 15}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 7}, expect({{0, 7}}));
ranges.add_range({9, 15}, expect({{9, 15}}));
ranges.add_range({5, 10}, expect({{7, 9}}));
expect(ranges.add_range({0, 7}), {{0, 7}});
expect(ranges.add_range({9, 15}), {{9, 15}});
expect(ranges.add_range({5, 10}), {{7, 9}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 15}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 7}, expect({{0, 7}}));
ranges.add_range({11, 15}, expect({{11, 15}}));
ranges.add_range({5, 10}, expect({{7, 10}}));
expect(ranges.add_range({0, 7}), {{0, 7}});
expect(ranges.add_range({11, 15}), {{11, 15}});
expect(ranges.add_range({5, 10}), {{7, 10}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 10}, {11, 15}}));
ranges.remove_range({8, 13});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 8}, {13, 15}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 5}, expect({{0, 5}}));
ranges.add_range({10, 15}, expect({{10, 15}}));
expect(ranges.add_range({0, 5}), {{0, 5}});
expect(ranges.add_range({10, 15}), {{10, 15}});
ranges.update(ConstArrayView<LineModification>{{3, 3, 3, 1}, {11, 9, 2, 4}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 3}, {8, 9}, {13, 15}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 5}, expect({{0, 5}}));
expect(ranges.add_range({0, 5}), {{0, 5}});
ranges.update(ConstArrayView<LineModification>{{2, 2, 2, 0}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 3}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 5}, expect({{0, 5}}));
expect(ranges.add_range({0, 5}), {{0, 5}});
ranges.update(ConstArrayView<LineModification>{{2, 2, 0, 2}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 2}, {4, 7}}));
}
{
LineRangeSet ranges;
ranges.add_range({0, 1}, expect({{0, 1}}));
ranges.add_range({5, 10}, expect({{5, 10}}));
ranges.add_range({15, 20}, expect({{15, 20}}));
ranges.add_range({25, 30}, expect({{25, 30}}));
expect(ranges.add_range({0, 1}), {{0, 1}});
expect(ranges.add_range({5, 10}), {{5, 10}});
expect(ranges.add_range({15, 20}), {{15, 20}});
expect(ranges.add_range({25, 30}), {{25, 30}});
ranges.update(ConstArrayView<LineModification>{{2, 2, 3, 0}});
kak_assert((ranges.view() == ConstArrayView<LineRange>{{0, 1}, {2, 7}, {12, 17}, {22, 27}}));
}
Expand Down
8 changes: 5 additions & 3 deletions src/line_modification.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ Vector<LineModification> compute_line_modifications(const Buffer& buffer, size_t

using LineRange = Range<LineCount>;

struct LineRangeSet : private Vector<LineRange, MemoryDomain::Highlight>
using LineRangeList = Vector<LineRange, MemoryDomain::Highlight>;

struct LineRangeSet : private LineRangeList
{
using Base = Vector<LineRange, MemoryDomain::Highlight>;
using Base = LineRangeList;
using Base::operator[];
using Base::begin;
using Base::end;
Expand All @@ -38,7 +40,7 @@ struct LineRangeSet : private Vector<LineRange, MemoryDomain::Highlight>
void reset(LineRange range) { Base::operator=({range}); }

void update(ConstArrayView<LineModification> modifs);
void add_range(LineRange range, FunctionRef<void (LineRange)> on_new_range);
LineRangeList add_range(LineRange range);
void remove_range(LineRange range);
};

Expand Down

0 comments on commit 4c6b44d

Please sign in to comment.