Permalink
Browse files

Use Bentley & McIlroy's qsort for sorting strings and blocks

Currently, R3 uses platform-specific code for sorting.
This may increase the effort necessary to port the interpreter to new platforms.

This implementation brings the following advantages:

- The sorting algorithm is stable solving cc#1152
- Sort results will be consistent across wide range of platforms
- Makes porting to new platforms easier
- Battle-tested solution, as this is the way how the problem was solved in e.g. OpenBSD, FreeBSD, Apple OS, Android

The commit makes the following changes:

- Bentley & McIlroy's qsort is added as src/core/f-qsort.c.
- The source is unmodified except for a reference to where the source was downloaded from and two parts marked "commented-out"
- An attribution notice is added to NOTICE.
- The additional source is listed in src/tools/file-base.r.
Regenerate the default bundled (Linux/x86, 0.4.4) makefile, to have it build the new f-qsort.c.
  • Loading branch information...
1 parent 4d9840f commit dd1136278c467ed41f10dff99f822cfcdc632088 @ladislav ladislav committed Sep 26, 2013
Showing with 232 additions and 19 deletions.
  1. +3 −0 NOTICE
  2. +22 −19 make/makefile
  3. +206 −0 src/core/f-qsort.c
  4. +1 −0 src/tools/file-base.r
View
@@ -32,3 +32,6 @@ This file is part of the Independent JPEG Group's software.
dtoa:
The author of this software is David M. Gay.
Copyright (c) 1991, 2000, 2001 by Lucent Technologies.
+
+qsort:
+Copyright (c) 1992, 1993 The Regents of the University of California.
View
@@ -1,4 +1,4 @@
-# REBOL Makefile -- Generated by make-make.r (do not edit) on 18-Aug-2013/15:25:30-7:00
+# REBOL Makefile -- Generated by make-make.r (do not edit) on 5-Oct-2013/20:20:10+2:00
# This makefile is intentional kept simple to make builds possible on
# a wider range of target platforms.
@@ -133,24 +133,24 @@ OBJS = objs/a-constants.o objs/a-globals.o objs/a-lib.o objs/b-boot.o \
objs/c-function.o objs/c-port.o objs/c-task.o objs/c-word.o \
objs/d-crash.o objs/d-dump.o objs/d-print.o objs/f-blocks.o \
objs/f-deci.o objs/f-dtoa.o objs/f-enbase.o objs/f-extension.o \
- objs/f-math.o objs/f-modify.o objs/f-random.o objs/f-round.o \
- objs/f-series.o objs/f-stubs.o objs/l-scan.o objs/l-types.o \
- objs/m-gc.o objs/m-pools.o objs/m-series.o objs/n-control.o \
- objs/n-data.o objs/n-io.o objs/n-loop.o objs/n-math.o \
- objs/n-sets.o objs/n-strings.o objs/n-system.o objs/p-clipboard.o \
- objs/p-console.o objs/p-dir.o objs/p-dns.o objs/p-event.o \
- objs/p-file.o objs/p-net.o objs/s-cases.o objs/s-crc.o \
- objs/s-file.o objs/s-find.o objs/s-make.o objs/s-mold.o \
- objs/s-ops.o objs/s-trim.o objs/s-unicode.o objs/t-bitset.o \
- objs/t-block.o objs/t-char.o objs/t-datatype.o objs/t-date.o \
- objs/t-decimal.o objs/t-event.o objs/t-function.o objs/t-gob.o \
- objs/t-image.o objs/t-integer.o objs/t-logic.o objs/t-map.o \
- objs/t-money.o objs/t-none.o objs/t-object.o objs/t-pair.o \
- objs/t-port.o objs/t-string.o objs/t-time.o objs/t-tuple.o \
- objs/t-typeset.o objs/t-utype.o objs/t-vector.o objs/t-word.o \
- objs/u-bmp.o objs/u-compress.o objs/u-dialect.o objs/u-gif.o \
- objs/u-jpg.o objs/u-md5.o objs/u-parse.o objs/u-png.o \
- objs/u-sha1.o objs/u-zlib.o
+ objs/f-math.o objs/f-modify.o objs/f-qsort.o objs/f-random.o \
+ objs/f-round.o objs/f-series.o objs/f-stubs.o objs/l-scan.o \
+ objs/l-types.o objs/m-gc.o objs/m-pools.o objs/m-series.o \
+ objs/n-control.o objs/n-data.o objs/n-io.o objs/n-loop.o \
+ objs/n-math.o objs/n-sets.o objs/n-strings.o objs/n-system.o \
+ objs/p-clipboard.o objs/p-console.o objs/p-dir.o objs/p-dns.o \
+ objs/p-event.o objs/p-file.o objs/p-net.o objs/s-cases.o \
+ objs/s-crc.o objs/s-file.o objs/s-find.o objs/s-make.o \
+ objs/s-mold.o objs/s-ops.o objs/s-trim.o objs/s-unicode.o \
+ objs/t-bitset.o objs/t-block.o objs/t-char.o objs/t-datatype.o \
+ objs/t-date.o objs/t-decimal.o objs/t-event.o objs/t-function.o \
+ objs/t-gob.o objs/t-image.o objs/t-integer.o objs/t-logic.o \
+ objs/t-map.o objs/t-money.o objs/t-none.o objs/t-object.o \
+ objs/t-pair.o objs/t-port.o objs/t-string.o objs/t-time.o \
+ objs/t-tuple.o objs/t-typeset.o objs/t-utype.o objs/t-vector.o \
+ objs/t-word.o objs/u-bmp.o objs/u-compress.o objs/u-dialect.o \
+ objs/u-gif.o objs/u-jpg.o objs/u-md5.o objs/u-parse.o \
+ objs/u-png.o objs/u-sha1.o objs/u-zlib.o
HOST = objs/host-main.o objs/host-args.o objs/host-device.o objs/host-stdio.o \
objs/dev-net.o objs/dev-dns.o objs/host-lib.o objs/host-readline.o \
@@ -255,6 +255,9 @@ objs/f-math.o: $R/f-math.c
objs/f-modify.o: $R/f-modify.c
$(CC) $R/f-modify.c $(RFLAGS) -o objs/f-modify.o
+objs/f-qsort.o: $R/f-qsort.c
+ $(CC) $R/f-qsort.c $(RFLAGS) -o objs/f-qsort.o
+
objs/f-random.o: $R/f-random.c
$(CC) $R/f-random.c $(RFLAGS) -o objs/f-random.o
View
@@ -0,0 +1,206 @@
+/* This file was downloaded from
+ * https://raw.github.com/android/platform_bionic/master/libc/upstream-freebsd/lib/libc/stdlib/qsort.c
+ */
+
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+/* commented out by L.M.
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+*/
+
+#include <stdlib.h>
+
+#ifdef I_AM_QSORT_R
+typedef int cmp_t(void *, const void *, const void *);
+#else
+typedef int cmp_t(const void *, const void *);
+#endif
+static inline char *med3(char *, char *, char *, cmp_t *, void *);
+static inline void swapfunc(char *, char *, int, int);
+
+#define min(a, b) (a) < (b) ? a : b
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) { \
+ long i = (n) / sizeof (TYPE); \
+ TYPE *pi = (TYPE *) (parmi); \
+ TYPE *pj = (TYPE *) (parmj); \
+ do { \
+ TYPE t = *pi; \
+ *pi++ = *pj; \
+ *pj++ = t; \
+ } while (--i > 0); \
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(a, b, n, swaptype)
+ char *a, *b;
+ int n, swaptype;
+{
+ if(swaptype <= 1)
+ swapcode(long, a, b, n)
+ else
+ swapcode(char, a, b, n)
+}
+
+#define swap(a, b) \
+ if (swaptype == 0) { \
+ long t = *(long *)(a); \
+ *(long *)(a) = *(long *)(b); \
+ *(long *)(b) = t; \
+ } else \
+ swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+#ifdef I_AM_QSORT_R
+#define CMP(t, x, y) (cmp((t), (x), (y)))
+#else
+#define CMP(t, x, y) (cmp((x), (y)))
+#endif
+
+static inline char *
+med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk
+#ifndef I_AM_QSORT_R
+
+/* commented out by L.M.
+__unused
+*/
+
+#endif
+)
+{
+ return CMP(thunk, a, b) < 0 ?
+ (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
+ :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
+}
+
+#ifdef I_AM_QSORT_R
+void
+qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
+#else
+#define thunk NULL
+void
+qsort(void *a, size_t n, size_t es, cmp_t *cmp)
+#endif
+{
+ char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+ size_t d, r;
+ int cmp_result;
+ int swaptype, swap_cnt;
+
+loop: SWAPINIT(a, es);
+ swap_cnt = 0;
+ if (n < 7) {
+ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+ for (pl = pm;
+ pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+ pl -= es)
+ swap(pl, pl - es);
+ return;
+ }
+ pm = (char *)a + (n / 2) * es;
+ if (n > 7) {
+ pl = a;
+ pn = (char *)a + (n - 1) * es;
+ if (n > 40) {
+ d = (n / 8) * es;
+ pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
+ pm = med3(pm - d, pm, pm + d, cmp, thunk);
+ pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
+ }
+ pm = med3(pl, pm, pn, cmp, thunk);
+ }
+ swap(a, pm);
+ pa = pb = (char *)a + es;
+
+ pc = pd = (char *)a + (n - 1) * es;
+ for (;;) {
+ while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
+ if (cmp_result == 0) {
+ swap_cnt = 1;
+ swap(pa, pb);
+ pa += es;
+ }
+ pb += es;
+ }
+ while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
+ if (cmp_result == 0) {
+ swap_cnt = 1;
+ swap(pc, pd);
+ pd -= es;
+ }
+ pc -= es;
+ }
+ if (pb > pc)
+ break;
+ swap(pb, pc);
+ swap_cnt = 1;
+ pb += es;
+ pc -= es;
+ }
+ if (swap_cnt == 0) { /* Switch to insertion sort */
+ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+ for (pl = pm;
+ pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+ pl -= es)
+ swap(pl, pl - es);
+ return;
+ }
+
+ pn = (char *)a + n * es;
+ r = min(pa - (char *)a, pb - pa);
+ vecswap(a, pb - r, r);
+ r = min(pd - pc, pn - pd - es);
+ vecswap(pb, pn - r, r);
+ if ((r = pb - pa) > es)
+#ifdef I_AM_QSORT_R
+ qsort_r(a, r / es, es, thunk, cmp);
+#else
+ qsort(a, r / es, es, cmp);
+#endif
+ if ((r = pd - pc) > es) {
+ /* Iterate rather than recurse to save stack space */
+ a = pn - r;
+ n = r / es;
+ goto loop;
+ }
+/* qsort(pn - r, r / es, es, cmp);*/
+}
@@ -38,6 +38,7 @@ core: [
f-extension.c
f-math.c
f-modify.c
+ f-qsort.c
f-random.c
f-round.c
f-series.c

0 comments on commit dd11362

Please sign in to comment.