Skip to content

Commit 5bd63d6

Browse files
committed
array.c: replace sort! method implementation
- use heap sort (O(1)) instead of merge sort (O(n)) for better space complexity. - method implemented in C for better performance As a result, simple sorting now consumes far less memory and is faster. Since it's implemented in C, fiber context switching is not allowed from comparison, but we consider the risk is minimal (no one switches context in the comparison, right?)
1 parent b521d08 commit 5bd63d6

File tree

2 files changed

+71
-72
lines changed

2 files changed

+71
-72
lines changed

mrblib/array.rb

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -198,78 +198,6 @@ def delete(key, &block)
198198
ret
199199
end
200200

201-
##
202-
# call-seq:
203-
# array.sort! -> self
204-
# array.sort! {|a, b| ... } -> self
205-
#
206-
# Sort all elements and replace +self+ with these
207-
# elements.
208-
def sort!(&block)
209-
stack = [ [ 0, self.size - 1 ] ]
210-
until stack.empty?
211-
left, mid, right = stack.pop
212-
if right == nil
213-
right = mid
214-
# sort self[left..right]
215-
if left < right
216-
if left + 1 == right
217-
lval = self[left]
218-
rval = self[right]
219-
cmp = if block then block.call(lval,rval) else lval <=> rval end
220-
if cmp.nil?
221-
raise ArgumentError, "comparison of #{lval.inspect} and #{rval.inspect} failed"
222-
end
223-
if cmp > 0
224-
self[left] = rval
225-
self[right] = lval
226-
end
227-
else
228-
mid = ((left + right + 1) / 2).floor
229-
stack.push [ left, mid, right ]
230-
stack.push [ mid, right ]
231-
stack.push [ left, (mid - 1) ] if left < mid - 1
232-
end
233-
end
234-
else
235-
lary = self[left, mid - left]
236-
lsize = lary.size
237-
238-
# The entity sharing between lary and self may cause a large memory
239-
# copy operation in the merge loop below. This harmless operation
240-
# cancels the sharing and provides a huge performance gain.
241-
lary[0] = lary[0]
242-
243-
# merge
244-
lidx = 0
245-
ridx = mid
246-
(left..right).each { |i|
247-
if lidx >= lsize
248-
break
249-
elsif ridx > right
250-
self[i, lsize - lidx] = lary[lidx, lsize - lidx]
251-
break
252-
else
253-
lval = lary[lidx]
254-
rval = self[ridx]
255-
cmp = if block then block.call(lval,rval) else lval <=> rval end
256-
if cmp.nil?
257-
raise ArgumentError, "comparison of #{lval.inspect} and #{rval.inspect} failed"
258-
end
259-
if cmp <= 0
260-
self[i] = lval
261-
lidx += 1
262-
else
263-
self[i] = rval
264-
ridx += 1
265-
end
266-
end
267-
}
268-
end
269-
end
270-
self
271-
end
272-
273201
##
274202
# call-seq:
275203
# array.sort -> new_array

src/array.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,76 @@ mrb_ary_delete(mrb_state *mrb, mrb_value self)
14091409
return ret;
14101410
}
14111411

1412+
static mrb_bool
1413+
sort_cmp(mrb_state *mrb, mrb_value *p, mrb_int a, mrb_int b, mrb_value blk)
1414+
{
1415+
mrb_int cmp;
1416+
1417+
if (mrb_nil_p(blk)) {
1418+
cmp = mrb_cmp(mrb, p[a], p[b]);
1419+
}
1420+
else {
1421+
mrb_value c = mrb_funcall_id(mrb, blk, MRB_SYM(call), 2, p[a], p[b]);
1422+
if (mrb_nil_p(c) || !mrb_fixnum_p(c)) {
1423+
mrb_raisef(mrb, E_ARGUMENT_ERROR, "comparison of %!v and %!v failed", p[a], p[b]);
1424+
}
1425+
cmp = mrb_fixnum(c);
1426+
}
1427+
return cmp > 0;
1428+
}
1429+
1430+
static void
1431+
heapify(mrb_state *mrb, mrb_value *a, mrb_int index, mrb_int size, mrb_value blk)
1432+
{
1433+
mrb_int max = index;
1434+
mrb_int left_index = 2 * index + 1;
1435+
mrb_int right_index = left_index + 1;
1436+
if (left_index < size && sort_cmp(mrb, a, left_index, max, blk)) {
1437+
max = left_index;
1438+
}
1439+
if (right_index < size && sort_cmp(mrb, a, right_index, max, blk)) {
1440+
max = right_index;
1441+
}
1442+
if (max != index) {
1443+
mrb_value tmp = a[max];
1444+
a[max] = a[index];
1445+
a[index] = tmp;
1446+
heapify(mrb, a, max, size, blk);
1447+
}
1448+
}
1449+
1450+
/*
1451+
* call-seq:
1452+
* array.sort! -> self
1453+
* array.sort! {|a, b| ... } -> self
1454+
*
1455+
* Sort all elements and replace +self+ with these
1456+
* elements.
1457+
*/
1458+
static mrb_value
1459+
mrb_ary_sort_bang(mrb_state *mrb, mrb_value ary)
1460+
{
1461+
mrb_value blk;
1462+
1463+
mrb_int n = RARRAY_LEN(ary);
1464+
if (n < 2) return ary;
1465+
1466+
ary_modify(mrb, mrb_ary_ptr(ary));
1467+
mrb_get_args(mrb, "&", &blk);
1468+
1469+
mrb_value *a = RARRAY_PTR(ary);
1470+
for (mrb_int i = n / 2 - 1; i > -1; i--) {
1471+
heapify(mrb, a, i, n, blk);
1472+
}
1473+
for (mrb_int i = n - 1; i > 0; i--) {
1474+
mrb_value tmp = a[0];
1475+
a[0] = a[i];
1476+
a[i] = tmp;
1477+
heapify(mrb, a, 0, i, blk);
1478+
}
1479+
return ary;
1480+
}
1481+
14121482
void
14131483
mrb_init_array(mrb_state *mrb)
14141484
{
@@ -1446,6 +1516,7 @@ mrb_init_array(mrb_state *mrb)
14461516
mrb_define_method_id(mrb, a, MRB_SYM(unshift), mrb_ary_unshift_m, MRB_ARGS_ANY()); /* 15.2.12.5.30 */
14471517
mrb_define_method_id(mrb, a, MRB_SYM(to_s), mrb_ary_to_s, MRB_ARGS_NONE());
14481518
mrb_define_method_id(mrb, a, MRB_SYM(inspect), mrb_ary_to_s, MRB_ARGS_NONE());
1519+
mrb_define_method_id(mrb, a, MRB_SYM_B(sort), mrb_ary_sort_bang, MRB_ARGS_NONE());
14491520

14501521
mrb_define_method_id(mrb, a, MRB_SYM(__ary_eq), mrb_ary_eq, MRB_ARGS_REQ(1));
14511522
mrb_define_method_id(mrb, a, MRB_SYM(__ary_cmp), mrb_ary_cmp, MRB_ARGS_REQ(1));

0 commit comments

Comments
 (0)