Skip to content

Commit 67346a7

Browse files
authored
[Bug #21449] Fix Set#divide{|a,b|} using Union-find structure (#13680)
* [Bug #21449] Fix Set#divide{|a,b|} using Union-find structure Implements Union-find structure with path compression. Since divide{|a,b|} calls the given block n**2 times in the worst case, there is no need to implement union-by-rank or union-by-size optimization. * Avoid internal arrays from being modified from block passed to Set#divide Internal arrays can be modified from yielded block through ObjectSpace. Freeze readonly array, use ALLOCV_N instead of mutable array.
1 parent db6f397 commit 67346a7

File tree

2 files changed

+67
-69
lines changed

2 files changed

+67
-69
lines changed

set.c

Lines changed: 63 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -843,66 +843,72 @@ set_i_classify(VALUE set)
843843
return args[0];
844844
}
845845

846-
struct set_divide_args {
847-
VALUE self;
848-
VALUE set_class;
849-
VALUE final_set;
850-
VALUE hash;
851-
VALUE current_set;
852-
VALUE current_item;
853-
unsigned long ni;
854-
unsigned long nj;
855-
};
846+
// Union-find with path compression
847+
static long
848+
set_divide_union_find_root(long *uf_parents, long index, long *tmp_array)
849+
{
850+
long root = uf_parents[index];
851+
long update_size = 0;
852+
while (root != index) {
853+
tmp_array[update_size++] = index;
854+
index = root;
855+
root = uf_parents[index];
856+
}
857+
for (long j = 0; j < update_size; j++) {
858+
long idx = tmp_array[j];
859+
uf_parents[idx] = root;
860+
}
861+
return root;
862+
}
856863

857-
static VALUE
858-
set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg))
859-
{
860-
struct set_divide_args *args = (struct set_divide_args *)arg;
861-
if (args->nj > args->ni) {
862-
VALUE i = args->current_item;
863-
if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) {
864-
VALUE hash = args->hash;
865-
if (args->current_set == Qnil) {
866-
VALUE set = rb_hash_aref(hash, j);
867-
if (set == Qnil) {
868-
VALUE both[2] = {i, j};
869-
set = set_s_create(2, both, args->set_class);
870-
rb_hash_aset(hash, i, set);
871-
rb_hash_aset(hash, j, set);
872-
set_i_add(args->final_set, set);
873-
}
874-
else {
875-
set_i_add(set, i);
876-
rb_hash_aset(hash, i, set);
877-
}
878-
args->current_set = set;
879-
}
880-
else {
881-
set_i_add(args->current_set, j);
882-
rb_hash_aset(hash, j, args->current_set);
864+
static void
865+
set_divide_union_find_merge(long *uf_parents, long i, long j, long *tmp_array)
866+
{
867+
long root_i = set_divide_union_find_root(uf_parents, i, tmp_array);
868+
long root_j = set_divide_union_find_root(uf_parents, j, tmp_array);
869+
if (root_i != root_j) uf_parents[root_j] = root_i;
870+
}
871+
872+
static VALUE
873+
set_divide_arity2(VALUE set)
874+
{
875+
VALUE tmp, uf;
876+
long size, *uf_parents, *tmp_array;
877+
VALUE set_class = rb_obj_class(set);
878+
VALUE items = set_i_to_a(set);
879+
rb_ary_freeze(items);
880+
size = RARRAY_LEN(items);
881+
tmp_array = ALLOCV_N(long, tmp, size);
882+
uf_parents = ALLOCV_N(long, uf, size);
883+
for (long i = 0; i < size; i++) {
884+
uf_parents[i] = i;
885+
}
886+
for (long i = 0; i < size - 1; i++) {
887+
VALUE item1 = RARRAY_AREF(items, i);
888+
for (long j = i + 1; j < size; j++) {
889+
VALUE item2 = RARRAY_AREF(items, j);
890+
if (RTEST(rb_yield_values(2, item1, item2)) &&
891+
RTEST(rb_yield_values(2, item2, item1))) {
892+
set_divide_union_find_merge(uf_parents, i, j, tmp_array);
883893
}
884894
}
885895
}
886-
args->nj++;
887-
return j;
888-
}
889-
890-
static VALUE
891-
set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg))
892-
{
893-
struct set_divide_args *args = (struct set_divide_args *)arg;
894-
VALUE hash = args->hash;
895-
args->current_set = rb_hash_aref(hash, i);
896-
args->current_item = i;
897-
args->nj = 0;
898-
rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg);
899-
if (args->current_set == Qnil) {
900-
VALUE set = set_s_create(1, &i, args->set_class);
901-
rb_hash_aset(hash, i, set);
902-
set_i_add(args->final_set, set);
903-
}
904-
args->ni++;
905-
return i;
896+
VALUE final_set = set_s_create(0, 0, rb_cSet);
897+
VALUE hash = rb_hash_new();
898+
for (long i = 0; i < size; i++) {
899+
VALUE v = RARRAY_AREF(items, i);
900+
long root = set_divide_union_find_root(uf_parents, i, tmp_array);
901+
VALUE set = rb_hash_aref(hash, LONG2FIX(root));
902+
if (set == Qnil) {
903+
set = set_s_create(0, 0, set_class);
904+
rb_hash_aset(hash, LONG2FIX(root), set);
905+
set_i_add(final_set, set);
906+
}
907+
set_i_add(set, v);
908+
}
909+
ALLOCV_END(tmp);
910+
ALLOCV_END(uf);
911+
return final_set;
906912
}
907913

908914
static void set_merge_enum_into(VALUE set, VALUE arg);
@@ -936,19 +942,7 @@ set_i_divide(VALUE set)
936942
RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
937943

938944
if (rb_block_arity() == 2) {
939-
VALUE final_set = set_s_create(0, 0, rb_cSet);
940-
struct set_divide_args args = {
941-
.self = set,
942-
.set_class = rb_obj_class(set),
943-
.final_set = final_set,
944-
.hash = rb_hash_new(),
945-
.current_set = 0,
946-
.current_item = 0,
947-
.ni = 0,
948-
.nj = 0
949-
};
950-
rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args);
951-
return final_set;
945+
return set_divide_arity2(set);
952946
}
953947

954948
VALUE values = rb_hash_values(set_i_classify(set));

test/ruby/test_set.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,10 @@ def test_divide
781781
ret.each { |s| n += s.size }
782782
assert_equal(set.size, n)
783783
assert_equal(set, ret.flatten)
784+
785+
set = Set[2,12,9,11,13,4,10,15,3,8,5,0,1,7,14]
786+
ret = set.divide { |a,b| (a - b).abs == 1 }
787+
assert_equal(2, ret.size)
784788
end
785789

786790
def test_freeze

0 commit comments

Comments
 (0)