Skip to content

Commit

Permalink
Chez Scheme: add pbchunks support
Browse files Browse the repository at this point in the history
As of comment 4480e64, Chez Scheme can support Racket CS on a
platform where there's no native-code backend and where code
generation beyond libffi's is disallowed. Then again, since
interpreted mode is a factor of 20 slower, it's not so practical.

This commit makes a no-native-backed Racket CS somewhat more practical
by adding a "pbchunks" mode that compiles some bytecode to C. That
compilation path makes sense for the static code that is built into
the Racket CS executable, which includes the layers from Chez Scheme
primitive through the Racker expander. This strategy is analogous to
the "cify" mode of non-JIT Racket BC, which compiles the Racket macro
expander to C.

One difference between pbchunks and cify is that Racket BC's
primitives are already implemented in C, so cify applies only to the
expander's implementation. Compilation with pbchunks applies even to
the implementation of Scheme primitives. (Neither applies to
Racket-level code that is run via no-JIT Racket BC with cify or
no-native-backend Racket CS with pbchunks.) That difference explains
the relative performance I find on my machines for `racket -cl
racket/base`:

                                 pure
                 native       interpreted       cify/pbchunks
  Racket CS       x1             x20                x4
  Racket BC       x1              x5                x1

The x4 is not great, but the end result is that building a Racket
distirbution with pbchunks Racket CS feels at least practical, while
pure interpreted Racket CS took longer than I was willing to wait.
  • Loading branch information
mflatt committed Jan 29, 2022
1 parent 6c5ce5d commit e3929bc
Show file tree
Hide file tree
Showing 50 changed files with 2,907 additions and 907 deletions.
2 changes: 1 addition & 1 deletion .makefile
Expand Up @@ -344,7 +344,7 @@ RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)

# This branch name changes each time the pb boot files are updated:
PB_BRANCH == circa-8.4.0.4-2
PB_BRANCH == circa-8.4.0.5-1
PB_REPO = https://github.com/racket/pb

# Set to empty for Git before v1.7.10:
Expand Down
14 changes: 7 additions & 7 deletions Makefile
Expand Up @@ -47,7 +47,7 @@ RACKETCS_SUFFIX =
RACKET =
RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)
PB_BRANCH = circa-8.4.0.4-2
PB_BRANCH = circa-8.4.0.5-1
PB_REPO = https://github.com/racket/pb
SINGLE_BRANCH_FLAG = --single-branch
EXTRA_REPOS_BASE =
Expand Down Expand Up @@ -310,19 +310,19 @@ maybe-fetch-pb-as-is:
echo done
fetch-pb-from:
mkdir -p racket/src/ChezScheme/boot
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q $(SINGLE_BRANCH_FLAG) -b circa-8.4.0.4-2 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-8.4.0.4-2:remotes/origin/circa-8.4.0.4-2 ; fi
cd racket/src/ChezScheme/boot/pb && git remote set-branches origin circa-8.4.0.4-2
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-8.4.0.4-2
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q $(SINGLE_BRANCH_FLAG) -b circa-8.4.0.5-1 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-8.4.0.5-1:remotes/origin/circa-8.4.0.5-1 ; fi
cd racket/src/ChezScheme/boot/pb && git remote set-branches origin circa-8.4.0.5-1
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-8.4.0.5-1
pb-fetch:
$(MAKE) fetch-pb EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" PB_REPO="$(PB_REPO)" SINGLE_BRANCH_FLAG="$(SINGLE_BRANCH_FLAG)"
pb-build:
cd racket/src/ChezScheme && racket rktboot/main.rkt --machine pb
pb-stage:
cd racket/src/ChezScheme/boot/pb && git branch circa-8.4.0.4-2
cd racket/src/ChezScheme/boot/pb && git checkout circa-8.4.0.4-2
cd racket/src/ChezScheme/boot/pb && git branch circa-8.4.0.5-1
cd racket/src/ChezScheme/boot/pb && git checkout circa-8.4.0.5-1
cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build"
pb-push:
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-8.4.0.4-2
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-8.4.0.5-1
win-cs-base:
IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)"
IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)"
Expand Down
2 changes: 1 addition & 1 deletion pkgs/base/info.rkt
Expand Up @@ -14,7 +14,7 @@

;; In the Racket source repo, this version should change only when
;; "racket_version.h" changes:
(define version "8.4.0.4")
(define version "8.4.0.5")

(define deps `("racket-lib"
["racket" #:version ,version]))
Expand Down
2 changes: 1 addition & 1 deletion racket/src/ChezScheme/BUILDING
Expand Up @@ -61,7 +61,7 @@ and then trying again with `./configure`. In the former case, you can
use "auto.bootquick" instead of "<machine type>.bootquick".

If you plan to build on multiple different machines and you don't have
pbb bboot files, then it may be a good idea to generate pb boot files
pbb boot files, then it may be a good idea to generate pb boot files
via Racket:

racket rktboot/main.rkt --machine pb
Expand Down
86 changes: 81 additions & 5 deletions racket/src/ChezScheme/IMPLEMENTATION.md
Expand Up @@ -63,7 +63,7 @@ form. The build scripts do not convert boot files to vfasl format.
Chez Scheme assigns a `machine-type` name to each platform it runs on.
The `machine-type` name carries three pieces of information:

* *whether the system threaded*: A `t` indicates that it is, and an
* *whether the system threaded*: `t` indicates that it is, and an
absence indicates that it's not threaded;

* *the hardware platform*: `i3` for x86, `a6` for x86_64, `arm32` for
Expand All @@ -74,16 +74,18 @@ The `machine-type` name carries three pieces of information:

When you run "configure", it looks for boot and header files as the
directory "boot/*machine-type*". (If it doesn't find them, then
configuration cannot continue.)
configuration cannot continue.) For information on `pb` machine types,
see "Portable Bytecode" below.

The supported machine types are listed in "cmacros.ss" and reflected
by a "boot/*machine-type*" directory for boot and headers files and a
combination of "s/*kind*.def" files to describe the platform. There
may also be a "s/Mf-*machine-type*" makefile to select relevant files
in "s", a "c/Mf-*machine-type*" makefile for configration in "c", and
a "mats/Mf-*machine-type*" makefile to configure testing, but Unix
machine types are handled by Mf-unix and variables configured in the
"configure" and "workarea" scripts.
a "mats/Mf-*machine-type*" makefile to configure testing. Files for
Unix machine types can be generated from "s/unix.def" or "s/tunix.def"
and "c/Mf-unix" with variables configured by the "configure" and
"workarea" scripts.

The "workarea" script in the root of the Chez Scheme project is used
to generate a subdirectory with the appropriate contents to build for
Expand Down Expand Up @@ -1269,6 +1271,80 @@ value, then there must be some function that provides the value. If
you need the target machine's value, then it must be accessed using
`constant`.

# Portable Bytecode

The "portable bytecode" virtual machine uses a 32-bit instruction set
that is intepreted by a loop defined in "c/pb.c", where many of the
instruction implementations are in "c/pb.h". The instruction set is
custom, but inspired by Arm64. Of course, since the instructions are
interpreted, it does not run nearly as fast a native code that Chez
Scheme normally generates, but it runs fast enough to be useful for
bootstraping a Chez Scheme build from one portable set of boot files.
The pb machine type is also potentially useful in a setting that
disallows code generation or where there's not yet a machine-code
backend for Chez Scheme.

A `machine-type` name for a pb build follows a variant of the normal
conventions:

* *whether the system threaded*: `t` indicates that it is threaded;

* `pb`;

* *word side*: `64`, `32`, or blank for basic; and

* *endianness*: `l` for little-endian, `b` for big-endian, or blank
for basic.

Compiled files (including boot files) for a basic pb build work on all
platforms, while compiled files for a non-basic pb build have a
specific word size and endianness for improved performance. Run
"configure" with `--pb` for a basic build, or run "configure" with
`--pbarch` for a non-basic build.

A basic build can work on all platforms because it assumes a 64-bit
representation of Scheme values. On a 32-bit platform, the kernel is
compiled to use a 64-bit integer type for `ptr`, even though the high
half of a `ptr` value will always be zeros. The `TO_VOIDP` and
`TO_PTR` macros used in the kernel tell a C compiler that conversions
between 64-bit `ptr`s and (potentially) 32-bit pointers are
intentional. A basic build also avoids a compile-time assumption of
endianness, turning any such Scheme-level decisions into a run-time
branch. Bytecode instructions are stored as little endian in compiled
code for a basic build; on a big-endian machine, the kernel rewrites
instruction bytes to big-endian form while loading a fasl file, so the
interpreter can decode instructions in native order.

For a non-basic build, fragments of static Scheme code can be turned
into C code to compile and plug back into the kernel. These fragments
are called *pbchunks*. The `pbchunk-convert-file` function takes
compiled Scheme code (as a boot or fasl file), generates C code for
the chunks, and generates revised compiled code that contains
references to the chunks via `pb-chunk` instructions. Calling the
registration function in the generated C code registers chunks with
the kernel as targets for `pb-chunk` instructions. Each chunk has a
static index, so the revised compiled Scheme code must be used with
exactly the C chunks that are generated at the same time; when
multiple sets of chunks are used together, each needs to be created
with non-overlapping index ranges. Orchestrating the generation of
chunk files and linking/loading them into a kernel executable is
currently outside the scope of the Chez Scheme build scripts.

A `pb-chunk` instruction's payload is two integers: a 16-bit *index*
and an 8-bit *subindex*. The *index* selects a registered C chunk
function. The *subindex* is passed as the third argument to that
function. Meanwhile, the first two arguments to the chunk C function
are the machine state *ms* that lives in a thread context and the
address *ip* of the `pb-chunk` instruction. The pb virtual registers
are accessed via *ms*. The *ip* argument is useful for constructing
relative addresses, such as the address of code that contains a
relocatable reference. A C chunk function returns the address of pb
code to jump to. A chunk function might return an address of Scheme
function code to call that function, or it might return the address of
code to go back to running in interpreted mode for the same code
object where it started; that is, general jumps and bailing out of
chunk mode are implemented in the same way.

# Changing the Version Number

To change the version number:
Expand Down
4 changes: 3 additions & 1 deletion racket/src/ChezScheme/c/Mf-base
Expand Up @@ -44,7 +44,8 @@ kernelsrc=statics.c segment.c alloc.c symbol.c intern.c gcwrapper.c gc-011.c gc-

kernelobj=${kernelsrc:%.c=%.$o} ${mdobj}

kernelhdr=system.h types.h version.h globals.h externs.h segment.h atomic.h gc.c sort.h thread.h config.h compress-io.h itest.c nocurses.h popcount.h
kernelhdr=system.h types.h version.h globals.h externs.h segment.h atomic.h gc.c sort.h thread.h config.h \
compress-io.h itest.c nocurses.h popcount.h pb.h

mainsrc=main.c

Expand Down Expand Up @@ -76,6 +77,7 @@ gc-011.o gc-ocd.o: ${Include}/gc-ocd.inc
gc-oce.o: ${Include}/gc-oce.inc
gc-par.o: ${Include}/gc-par.inc
gcwrapper.o: ${Include}/heapcheck.inc
pb.o: pb.h

../zlib/zlib.h ../zlib/zconf.h: ../zlib/configure.log

Expand Down
6 changes: 3 additions & 3 deletions racket/src/ChezScheme/c/atomic.h
Expand Up @@ -34,7 +34,7 @@
#endif

#if !defined(PTHREADS)
# define CAS_ANY_FENCE(a, old, new) ((*(a) == (old)) ? (*(a) = (new), 1) : 0)
# define CAS_ANY_FENCE(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr)(a) = TO_PTR(new), 1) : 0)
#elif defined(__arm64__) || defined(__aarch64__)
FORCEINLINE int CAS_LOAD_ACQUIRE(volatile void *addr, void *old_val, void *new_val) {
long ret;
Expand Down Expand Up @@ -97,7 +97,7 @@ FORCEINLINE int S_cas_any_fence(int load_acquire, volatile void *addr, void *old
# define CAS_LOAD_ACQUIRE(a, old, new) S_cas_any_fence(1, a, old, new)
# define CAS_STORE_RELEASE(a, old, new) S_cas_any_fence(0, a, old, new)
#elif (__GNUC__ >= 5) || defined(__clang__)
# define CAS_ANY_FENCE(a, old, new) __sync_bool_compare_and_swap((ptr *)(a), (ptr)(old), (ptr)(new))
# define CAS_ANY_FENCE(a, old, new) __sync_bool_compare_and_swap((ptr *)(a), TO_PTR(old), TO_PTR(new))
#elif defined(_MSC_VER)
# if ptr_bits == 64
# define CAS_ANY_FENCE(a, old, new) (_InterlockedCompareExchange64((__int64 *)(a), (__int64)(new), (__int64)(old)) == (__int64)(old))
Expand All @@ -120,7 +120,7 @@ FORCEINLINE int S_cas_any_fence(volatile void *addr, void *old_val, void *new_va
}
# define CAS_ANY_FENCE(a, old, new) S_cas_any_fence(a, old, new)
#else
# define CAS_ANY_FENCE(a, old, new) ((*(a) == (old)) ? (*(a) = (new), 1) : 0)
# define CAS_ANY_FENCE(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr *)(a) = TO_PTR(new), 1) : 0)
#endif

#ifdef CAS_ANY_FENCE
Expand Down
2 changes: 1 addition & 1 deletion racket/src/ChezScheme/c/externs.h
Expand Up @@ -127,7 +127,7 @@ extern int S_fasl_intern_rtd(ptr *x);
#ifdef X86_64
extern void x86_64_set_popcount_present PROTO((ptr code));
#endif
#ifdef PORTABLE_BYTECODE_BIGENDIAN
#ifdef PORTABLE_BYTECODE_SWAPENDIAN
extern void S_swap_dounderflow_header_endian PROTO((ptr code));
#endif

Expand Down
23 changes: 14 additions & 9 deletions racket/src/ChezScheme/c/fasl.c
Expand Up @@ -276,7 +276,7 @@ static U32 adjust_delay_inst PROTO((U32 delay_inst, U32 *old_call_addr, U32 *new
static INT sparc64_set_lit_only PROTO((void *address, uptr item, I32 destreg));
static void sparc64_set_literal PROTO((void *address, uptr item));
#endif /* SPARC64 */
#ifdef PORTABLE_BYTECODE_BIGENDIAN
#ifdef PORTABLE_BYTECODE_SWAPENDIAN
static void swap_code_endian(octet *code, uptr len);
#endif

Expand Down Expand Up @@ -1071,7 +1071,7 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) {
S_G.profile_counters = Scons(S_weak_cons(co, pinfos), S_G.profile_counters);
}
code_bytesin((octet *)&CODEIT(co, 0), n, f);
#ifdef PORTABLE_BYTECODE_BIGENDIAN
#ifdef PORTABLE_BYTECODE_SWAPENDIAN
swap_code_endian((octet *)&CODEIT(co, 0), n);
#endif
m = uptrin(f);
Expand Down Expand Up @@ -1984,7 +1984,7 @@ static void sparc64_set_literal(address, item) void *address; uptr item; {
}
#endif /* SPARC64 */

#ifdef PORTABLE_BYTECODE_BIGENDIAN
#ifdef PORTABLE_BYTECODE_SWAPENDIAN
typedef struct {
octet *code;
uptr size;
Expand Down Expand Up @@ -2028,6 +2028,13 @@ static void swap_code_endian(octet *code, uptr len)
octet b = code[1];
octet c = code[2];
octet d = code[3];
#if fasl_endianness_is_little
octet le_a = a, le_b = b, le_c = c, le_d = d;
# define le_tag_offset -8
#else
octet le_a = d, le_b = c, le_c = b, le_d = a;
# define le_tag_offset -1
#endif
code[0] = d;
code[1] = c;
code[2] = b;
Expand All @@ -2036,9 +2043,9 @@ static void swap_code_endian(octet *code, uptr len)
code += 4;
len -= 4;

if (a == pb_adr) {
if (le_a == pb_adr) {
/* delta can be negative for a mvlet-error reinstall of the return address */
iptr delta = (((iptr)d << (ptr_bits - 8)) >> (ptr_bits - 20)) + ((iptr)c << 4) + (b >> 4);
iptr delta = 4*((((iptr)le_d << (ptr_bits - 8)) >> (ptr_bits - 20)) + ((iptr)le_c << 4) + (le_b >> 4));
if (delta > 0) {
/* after a few more instructions, we'll hit
a header where 64-bit values needs to be
Expand All @@ -2049,15 +2056,13 @@ static void swap_code_endian(octet *code, uptr len)

if ((uptr)delta > len)
S_error_abort("swap endian: delta goes past end");
if (delta & 0x3)
S_error_abort("swap endian: delta is not a multiple of 4");

if (after_rpheader[-8] & 0x1)
if (after_rpheader[le_tag_offset] & 0x1)
header_size = size_rp_compact_header;
else
header_size = size_rp_header;
rpheader = after_rpheader - header_size;

if (rpheader_stack_pos == rpheader_stack_size) {
int new_size = (2 * rpheader_stack_size) + 16;
rpheader_t *new_stack;
Expand Down
2 changes: 1 addition & 1 deletion racket/src/ChezScheme/c/ffi.c
Expand Up @@ -355,7 +355,7 @@ void S_ffi_call(ptr types, ptr proc, ptr *arena) {
ffi_cif *cif = make_cif(types);
iptr len = Svector_length(types), i;
iptr n_args = len - ARG_TYPE_START_INDEX;
void *rvalue, **args = TO_VOIDP((uptr)TO_PTR(arena) + (n_args * 8));
void *rvalue, **args = TO_VOIDP((uptr)TO_PTR(arena) + ((n_args+1) * 8));

if (Svector_ref(types, RET_IS_ARG_INDEX) != Sfalse) {
rvalue = TO_VOIDP(*arena);
Expand Down

0 comments on commit e3929bc

Please sign in to comment.