diff --git a/Makefile b/Makefile index 8903033ba..a3e3284c8 100644 --- a/Makefile +++ b/Makefile @@ -468,6 +468,8 @@ else ifeq ($(HAVE_LIGHTREC), 1) FLAGS += -DHAVE_WIN_SHM + #For lightrec to get number of cpus, mingw should provide pthread_num_processors_np like pthread-win32 + FLAGS += -DPTW32_VERSION endif endif diff --git a/Makefile.common b/Makefile.common index 3fbc7731c..389b3af75 100644 --- a/Makefile.common +++ b/Makefile.common @@ -183,11 +183,7 @@ ifeq ($(HAVE_LIGHTREC), 1) ifeq ($(THREADED_RECOMPILER), 0) FLAGS += -DENABLE_THREADED_COMPILER=0 else - ifeq ($(DEBUG), 0) - FLAGS += -DENABLE_THREADED_COMPILER=1 - else - FLAGS += -DENABLE_THREADED_COMPILER=0 - endif + FLAGS += -DENABLE_THREADED_COMPILER=1 endif ifneq (,$(findstring win,$(platform))) @@ -197,8 +193,8 @@ ifeq ($(HAVE_LIGHTREC), 1) INCFLAGS += -I$(DEPS_DIR)/lightning/include \ -I$(DEPS_DIR)/lightrec \ - -I$(CORE_DIR)/lightning-lightrec-include \ - -include $(CORE_DIR)/lightning-lightrec-include/debug.h + -I$(CORE_DIR)/include \ + -include $(CORE_DIR)/include/debug.h endif ifneq ($(HAVE_GRIFFIN), 1) @@ -375,12 +371,11 @@ ifeq ($(HAVE_LIGHTREC), 1) $(DEPS_DIR)/lightrec/memmanager.c \ $(DEPS_DIR)/lightrec/optimizer.c \ $(DEPS_DIR)/lightrec/reaper.c \ - $(DEPS_DIR)/lightrec/regcache.c + $(DEPS_DIR)/lightrec/regcache.c \ + $(DEPS_DIR)/lightrec/tlsf/tlsf.c ifeq ($(THREADED_RECOMPILER), 1) - ifeq ($(DEBUG), 0) - SOURCES_C += $(DEPS_DIR)/lightrec/recompiler.c - endif + SOURCES_C += $(DEPS_DIR)/lightrec/recompiler.c endif ifeq ($(LIGHTREC_LOG_LEVEL), 4) diff --git a/deps/lightning/.gitignore b/deps/lightning/.gitignore index 62ca42aa8..bc7e97126 100644 --- a/deps/lightning/.gitignore +++ b/deps/lightning/.gitignore @@ -1,4 +1,15 @@ +/build-aux +* + +*.o +*.lo +*.la + +.libs/ +.deps/ +*/.libs/ +*/.deps/ + autom4te.cache aclocal.m4 depcomp @@ -20,14 +31,14 @@ missing size stamp-h1 test-driver -check/.deps -doc/.deps -lib/.deps + m4/libtool.m4 m4/lt~obsolete.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 -doc/mdate-sh -doc/texinfo.tex + lightning.pc +include/lightning.h + +build-aux/ diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index cae50ca34..f709a4be3 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://git.savannah.gnu.org/git/lightning.git branch = master - commit = 876c1043bec5bfd594482b40700c84693e40d0eb + commit = ccb8bd77a46c2123c634fb3115b0023165793975 parent = 9f797430963d9cf0fcef7d963466f9cac7026de2 method = merge cmdver = 0.4.3 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index 9964207e0..0d8f68b88 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,211 @@ +2022-11-09 Paulo Andrade + + * configure.ac: Add new --enable-devel-strong-type-checking + option. + * include/lightning.h.in: Rework to not need to know if + PACKED_STACK is defined, and add a new argument to _jit_arg, + _jit_putarg{r,i}, _jit_pusharg{r,i} and _jit_ret{r,i} to have + the same code path if PACKED_STACK is defined or not, and also + to implement STRONG_TYPE_CHECK enabled with the new + --enable-devel-strong-type-checking. + * include/lightning/jit_private.h: Add new macros to add assertions + for STRONG_TYPE_CHECK and avoid pasting tokens in jit_inc_synth* + when the token is not a static known value. + * lib/jit_aarch64.c: The first implementation of the new code, + working correctly in Apple M1 and with and without STRONG_TYPE_CHECK + in Linux. + +2022-11-08 Paulo Andrade + + Add support for packed stack arguments as used by Apple M1 + aarch64 cpus. This requires a major redesign in how Lightning + works, because contrary to all other supported ports, in this + case arguments must be truncated and sign/zero extended if + passed in registers, but when receiving the argument, there + is no need to truncate and sign/zero extend. + Return values are also treated this way. The callee must + truncate sign/zero extend, not the caller. + check/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS. + check/all.tst: Implement paired arg/getarg/pusharg/putarg/ret + codes to validate they do not generate assertions. + * check/allocar.tst, check/call.tst, check/fib.tst, check/put.tst, + check/stack.tst: Update to pass in all build types. + check/lightning.c: Add new codes for extra codes to handle + packed stack. + * configure.ac: Add a preprocessor define to know if packed stack + need is required. This is not really used, as it was moved to + jit_aarch64.h. + * doc/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS. + * doc/rpn.c: Update to pass in all build types. + include/lightning.h.in: Add new codes and reorder enum. + * include/lightning/jit_aarch64.h: Detect condition of needing + a packed stack. + * lib/jit_aarch64-sz.c: Regenerate. + * lib/jit_aarch64.c: Major updates for packed stack. + * lib/jit_names.c: Updates for debug output. + * lib/lightning.c: Update for new codes. + +2022-10-31 Marc Nieper-Wißkirchen + + Add new skip instruction. + * .gitignore: Update from Gnulib. + * check/Makefile.am: Add tests. + * check/lightning.c: Handle skip instructions. + * check/protect.c: Rewrite with skip. + * check/skip.ok: New test. + * check/skip.tst: New test. + * doc/body.texi: Document the skip instruction. + * include/lightning.h.in: Add the skip instruction. + * lib/jit_aarch64-sz.c: Update for skip instruction. + * lib/jit_aarch64.c: Implement skip instruction. + * lib/jit_alpha-sz.c: Update for skip instruction. + * lib/jit_alpha.c: Implement skip instruction. + * lib/jit_arm-sz.c: Update for skip instruction. + * lib/jit_arm.c: Implement skip instruction. + * lib/jit_hppa-sz.c: Update for skip instruction. + * lib/jit_hppa.c: Implement skip instruction. + * lib/jit_ia64-sz.c: Update for skip instruction. + * lib/jit_ia64.c: Implement skip instruction. + * lib/jit_loongarch-sz.c: Update for skip instruction. + * lib/jit_loongarch.c: Implement skip instruction. + * lib/jit_mips-sz.c: Update for skip instruction. + * lib/jit_mips.c: Implement skip instruction. + * lib/jit_names.c: Update for skip instruction. + * lib/jit_ppc-sz.c: Update for skip instruction. + * lib/jit_ppc.c: Implement skip instruction. + * lib/jit_riscv-sz.c: Update for skip instruction. + * lib/jit_riscv.c: Implement skip instruction. + * lib/jit_s390-sz.c: Update for skip instruction. + * lib/jit_s390.c: Implement skip instruction. + * lib/jit_size.c: Treat align and skip in a special way. + * lib/jit_sparc-sz.c: Update for skip instruction. + * lib/jit_sparc.c: Implement skip instruction. + * lib/jit_x86-sz.c: Update for skip instruction. + * lib/jit_x86.c: Implement skip instruction. + * lib/lightning.c: Classify skip instruction. + +2022-10-30 Marc Nieper-Wißkirchen + + Add user-visible functions jit_protect and jit_unprotect. + * check/Makefile.am: Add test for jit_protect and jit_unprotect. + * check/protect.c: New test. + * doc/body.texi: Add documentation for jit_protect and + jit_unprotect. + * include/lightning.h.in: Add prototypes for jit_protect and + jit_unprotect. + * include/lightning/jit_private.h: Add a field to store the size + of the protected memory. + * lib/lightning.c: Remember the size of the protected memory and + implement the two new functions. + +2022-10-12 Paulo Andrade + + * include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c, + lib/jit_loongarch-fpu.c, lib/jit_loongarch-sz.c, lib/jit_loongarch.c: + New files implementing the first version of the new loongarch port. + * check/float.tst: Add preprocessor checks for NaN and +-Inf + values converted to integers for loongarch. + * configure.ac: Add check and conditionals for new architecture. + * include/lightning.h.in, check/lightning.c, + include/lightning/Makefile.am, include/lightning/jit_private.h, + lib/Makefile.am, lib/jit_size.c, lib/lightning.c: Update for new + port. + +2022-10-05 Paulo Andrade + + * check/lightning.c: Remove -Dmacro=value from usage and attempt + to parse it. It was buggy and not properly implemented. Now + it pass any extra options to the generated jit. To pass any + option starting with '-' need to also use '--'. + * check/collatz.e: New sample file showing an example of jit + generation. + +2022-10-04 Paulo Andrade + + * include/lightning/jit_private.h: Add new flag to jit_block_t. + * lib/lightning.c: Rewrite register liveness and state at block + entry code to avoid a very expensive and non scaling code path. + Now it attempts to do as few as possible recomputations when + merging state of adjacent blocks, still doing one extra nop pass + (in the sense that it will not find any changes) to make sure the + logic is correct. + +2022-09-30 Paulo Andrade + + * include/lightning/jit_private.h: Implement new data structures + specific to riscv. + * lib/jit_disasm.c: Extra disassemble code for riscv constant pool. + * lib/jit_riscv-cpu.c: Modify movi to use constant pool if 3 or + more instructions are required to construct constant and modify + movi_p to use a pc relative load from a constant pool. + lib/jit_riscv-sz.c: Update for new constant pool code. Most + instructions that need 64 bit constants are significantly reduced. + * lib/jit_riscv.c: Implement most of the constant pool code. + * lib/jit_size.c: Update for estimate of code generation size. + * lib/lightning.c: Update for riscv specific code, and also make + sure to mprotect the constant pool as executable. + +2022-09-08 Paulo Andrade + + * lib/jit_fallback.c: Implement fallback compare and swap with + pthreads. + * check/Makefile.am: Update for new cas{r,i} simple test. + * check/catomic.c, check/catomic.ok: New test case for + simple compare and swap atomic operation. + * check/lightning.c: Add entries to be able to use + the new compare and swap atomic operation. Still missing + a general test, only the basic C version. + * include/lightning.h.in: Include pthread.h, even if not + needing a fallback compare and swap. + * include/lightning/jit_private.h: Add support for a register pair + in second argument. Required by the new casr and casi operations. + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_x86-cpu.c, + lib/jit_x86-sz.c, lib/jit_x86.c: Implement inline code for compare + and swap. + * lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c: Implement + inline code for compare and swap if cpu is armv7, otherwise, use + a fallback with pthreads. + * lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c, + lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Implement + fallback compare and swap with pthreads. At least some of these + should be updated for inline code generation. + * lib/jit_names.c, lib/jit_print.c: lib/lightning.c: Update for the + new compare and swap operation. + * doc/body.texi: Add simple documentation of the compare and swap + new operation. + +2022-08-12 Marc Nieper-Wißkirchen + + Document jit_align. + * doc/body.texi: Add documentation for jit_align. + +2022-05-14 Paulo Andrade + + * include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list. + * lib/jit_alpha.c, lib/jit_alpha-cpu.c, lib/jit_hppa.c, + lib/jit_hppa-cpu.c, lib/jit_ia64.c, lib/jit_ia64-cpu.c, + lib/jit_riscv.c, lib/jit_riscv-cpu.c, lib/jit_s390.c, + lib/jit_s390-cpu.c, lib/jit_sparc.c, lib/jit_sparc-cpu.c: + Implement fallback jit_mov{n,z}r. These are a somewhat cheap + implementation, but should be reviewed for the arches that already + have a proper conditional move. + * lib/jit_arm-sz.c, lib/jit_mips-sz.c: Add missing maximum size + estimative and reorder. + * lib/jit_aarch64-sz.c, lib/jit_x86-sz.c, lib/jit_ppc-sz.c: + Reorder entry to match definition order. + * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_hppa-sz.c, + lib/jit_ia64-sz.c, lib/jit_riscv-sz.c, lib/jit_s390-sz.c, + lib/jit_sparc-sz.c: Add heuristic value, basically the sum of + the cost of a movr + beqr. + * lib/jit_names.c: Add entries for debug output of mov{n,z}r. + * lib/lightning.c: Use proper bitmask in jit_classify. + 2021-04-03 Marc Nieper-Wißkirchen * check/Makefile.am: Add test for the live instruction. diff --git a/deps/lightning/README b/deps/lightning/README index ae36ea578..7e3df424b 100644 --- a/deps/lightning/README +++ b/deps/lightning/README @@ -1,3 +1,5 @@ GNU lightning is a library to aid in making portable programs that compile assembly code at run time. For more information, look at the info documentation. + +For help building lightning, see README-hacking. diff --git a/deps/lightning/README-hacking b/deps/lightning/README-hacking index 285f3c93a..cc6159855 100644 --- a/deps/lightning/README-hacking +++ b/deps/lightning/README-hacking @@ -22,6 +22,12 @@ for Debian-based systems such as Ubuntu: ** Building +If you intend to do development work with lightning, it's useful to build +lightning with its disassembler feature enabled. This optional feature +requires additional dependencies. On Ubuntu, this command should work: + + $ sudo apt-get install binutils-dev libiberty-dev zlib1g-dev + After getting the git sources, and installing the tools above, you can run $ ./bootstrap @@ -38,6 +44,10 @@ should output no difference. After that first time, running make should suffice. +To install lightning: + + $ sudo make install + ** Gnulib This distribution also uses Gnulib (https://www.gnu.org/software/gnulib) to diff --git a/deps/lightning/THANKS b/deps/lightning/THANKS index 0e0f1a943..d5737afbd 100644 --- a/deps/lightning/THANKS +++ b/deps/lightning/THANKS @@ -19,3 +19,4 @@ Holger Hans Peter Freyther Jon Arintok Bruno Haible Marc Nieper-Wißkirchen +Paul Cercueil diff --git a/deps/lightning/TODO b/deps/lightning/TODO index 676af0293..8b1378917 100644 --- a/deps/lightning/TODO +++ b/deps/lightning/TODO @@ -1,28 +1 @@ - * Validate that divrem in jit_x86-cpu.c is not modifying - the non result arguments. This is not verified by clobber.tst, - as it only checks registers not involved in the operation - (because it does not know about values being set as input - for the the operation). - * Write a simple higher level language implementation generating - jit with lightning, that could be some lisp or C like language. - - * rerun ./configure --enable-devel-get-jit-size and regenerate - the related jit_$arch-sz.c for the ports where nodata is - meaningful: - hppa (done) - i586 (done) - ia64 - mips o32 (done) - mips n32 - mips n64 - powerpc 32 (done) - powerpc 64 (done) - ppc - s390x (done) - sparc (done) - x86_64 (done) - Missing ones are due to no longer (remote) access to such hosts - and may be broken with jit_set_data(..., JIT_DISABLE_DATA). - (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and - (darwin ppc). diff --git a/deps/lightning/include/lightning/jit_aarch64.h b/deps/lightning/include/lightning/jit_aarch64.h index 6e7d8be94..2af498b5e 100644 --- a/deps/lightning/include/lightning/jit_aarch64.h +++ b/deps/lightning/include/lightning/jit_aarch64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -23,6 +23,10 @@ #define JIT_HASH_CONSTS 0 #define JIT_NUM_OPERANDS 3 +#if __APPLE__ +# define PACKED_STACK 1 +#endif + /* * Types */ diff --git a/deps/lightning/include/lightning/jit_alpha.h b/deps/lightning/include/lightning/jit_alpha.h index 9bae34372..35934319d 100644 --- a/deps/lightning/include/lightning/jit_alpha.h +++ b/deps/lightning/include/lightning/jit_alpha.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_arm.h b/deps/lightning/include/lightning/jit_arm.h index 81451f12a..8f7278dba 100644 --- a/deps/lightning/include/lightning/jit_arm.h +++ b/deps/lightning/include/lightning/jit_arm.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_hppa.h b/deps/lightning/include/lightning/jit_hppa.h index ddc3950fb..afdf21dab 100644 --- a/deps/lightning/include/lightning/jit_hppa.h +++ b/deps/lightning/include/lightning/jit_hppa.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_ia64.h b/deps/lightning/include/lightning/jit_ia64.h index 718f191f3..7b212b9ac 100644 --- a/deps/lightning/include/lightning/jit_ia64.h +++ b/deps/lightning/include/lightning/jit_ia64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_loongarch.h b/deps/lightning/include/lightning/jit_loongarch.h new file mode 100644 index 000000000..44982ecc8 --- /dev/null +++ b/deps/lightning/include/lightning/jit_loongarch.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_loongarch_h +#define _jit_loongarch_h + +/* + * Types + */ +#define JIT_FP _FP +typedef enum { +#define jit_r_num() 9 +#define jit_r(i) (_T0 - (i)) +#define JIT_R0 _T0 +#define JIT_R1 _T1 +#define JIT_R2 _T2 +#define JIT_R3 _T3 +#define JIT_R4 _T4 +#define JIT_R5 _T5 +#define JIT_R6 _T6 +#define JIT_R7 _T7 +#define JIT_R8 _T8 + _T8, _T7, _T6, _T5, _T4, _T3, _T2, _T1, _T0, +#define jit_v_num() 9 +#define jit_v(i) (_S0 - (i)) +#define JIT_V0 _S0 +#define JIT_V1 _S1 +#define JIT_V2 _S2 +#define JIT_V3 _S3 +#define JIT_V4 _S4 +#define JIT_V5 _S5 +#define JIT_V6 _S6 +#define JIT_V7 _S7 +#define JIT_V8 _S8 + _S8, _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0, + _A7, _A6, _A5, _A4, _A3, _A2, _A1, _A0, + _FP, + _R21, + _ZERO, + _RA, + _TP, + _SP, + _FT0, _FT1, _FT2, _FT3, _FT4, _FT5, _FT6, _FT7, + _FT8, _FT9, _FT10, _FT11, _FT12, _FT13, _FT14, _FT15, + _FA7, _FA6, _FA5, _FA4, FA3, _FA2, _FA1, _FA0, +#define jit_f_num() 8 +#define jit_f(i) (_FS0 - (i)) +#define JIT_F0 _FS0 +#define JIT_F1 _FS1 +#define JIT_F2 _FS2 +#define JIT_F3 _FS3 +#define JIT_F4 _FS4 +#define JIT_F5 _FS5 +#define JIT_F6 _FS6 +#define JIT_F7 _FS7 + _FS7, _FS6, _FS5, _FS4, _FS3, _FS2, _FS1, _FS0, +#define JIT_NOREG _NOREG + _NOREG, +} jit_reg_t; + +#endif /* _jit_loongarch_h */ diff --git a/deps/lightning/include/lightning/jit_mips.h b/deps/lightning/include/lightning/jit_mips.h index 45f3851f0..a2388c9c8 100644 --- a/deps/lightning/include/lightning/jit_mips.h +++ b/deps/lightning/include/lightning/jit_mips.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_ppc.h b/deps/lightning/include/lightning/jit_ppc.h index f1bdbcbb8..b78b4bb58 100644 --- a/deps/lightning/include/lightning/jit_ppc.h +++ b/deps/lightning/include/lightning/jit_ppc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -22,6 +22,9 @@ #define JIT_HASH_CONSTS 1 #define JIT_NUM_OPERANDS 3 +#if defined(_AIX) && !defined(_CALL_AIX) && !defined(_CALL_LINUX) +# define _CALL_AIXDESC 1 +#endif /* * Types diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index b8ecbf48b..c2a4082d5 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -55,6 +55,26 @@ # define HIDDEN /**/ #endif +#if PACKED_STACK || STRONG_TYPE_CHECKING +# define assert_arg_type(code, expect) \ + do assert((code) == (expect)); while (0) +# define assert_putarg_type(code, expect) \ + do \ + assert((((code) - jit_code_putargr_c) >> 2) == \ + ((expect) - jit_code_arg_c)); \ + while (0) +#else +# define assert_arg_type(code, expect) \ + do assert((int)(code) == (int)(expect) || \ + (code) == jit_code_arg); while (0) +# define assert_putarg_type(code, expect) \ + do \ + assert(((((code) - jit_code_putargr_c) >> 2) == \ + ((expect) - jit_code_arg_c)) || \ + ((code) == jit_code_arg)); \ + while (0) +#endif + #define rc(value) jit_class_##value #define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) @@ -150,6 +170,13 @@ typedef jit_uint64_t jit_regset_t; # define JIT_RET _A0 # define JIT_FRET _FA0 typedef jit_uint64_t jit_regset_t; +#elif defined(__loongarch__) +# define JIT_RA0 _A0 +# define JIT_FA0 _FA0 +# define JIT_SP _SP +# define JIT_RET _A0 +# define JIT_FRET _FA0 +typedef jit_uint64_t jit_regset_t; #endif #define jit_data(u,v,w) _jit_data(_jit,u,v,w) @@ -167,46 +194,62 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, (!jit_regset_tstbit(&_jitc->regarg, regno) && \ !jit_regset_tstbit(&_jitc->regsav, regno)) -#define jit_inc_synth(code) \ +#define jit_code_inc_synth(code) \ do { \ - (void)jit_new_node(jit_code_##code); \ + (void)jit_new_node(code); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_w(code, u) \ +#define jit_inc_synth(name) \ + jit_code_inc_synth(jit_code_##name) +#define jit_code_inc_synth_w(code, u) \ do { \ - (void)jit_new_node_w(jit_code_##code, u); \ + (void)jit_new_node_w(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_f(code, u) \ +#define jit_inc_synth_w(name, u) \ + jit_code_inc_synth_w(jit_code_##name, u) +#define jit_code_inc_synth_f(code, u) \ do { \ - (void)jit_new_node_f(jit_code_##code, u); \ + (void)jit_new_node_f(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_d(code, u) \ +#define jit_inc_synth_f(name, u) \ + jit_code_inc_synth_f(jit_code_##name, u) +#define jit_code_inc_synth_d(code, u) \ do { \ - (void)jit_new_node_d(jit_code_##code, u); \ + (void)jit_new_node_d(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_ww(code, u, v) \ +#define jit_inc_synth_d(name, u) \ + jit_code_inc_synth_d(jit_code_##name, u) +#define jit_code_inc_synth_ww(code, u, v) \ do { \ - (void)jit_new_node_ww(jit_code_##code, u, v); \ + (void)jit_new_node_ww(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_wp(code, u, v) \ +#define jit_inc_synth_ww(name, u, v) \ + jit_code_inc_synth_ww(jit_code_##name, u, v) +#define jit_code_inc_synth_wp(code, u, v) \ do { \ - (void)jit_new_node_wp(jit_code_##code, u, v); \ + (void)jit_new_node_wp(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_fp(code, u, v) \ +#define jit_inc_synth_wp(name, u, v) \ + jit_code_inc_synth_wp(jit_code_##name, u, v) +#define jit_code_inc_synth_fp(code, u, v) \ do { \ - (void)jit_new_node_fp(jit_code_##code, u, v); \ + (void)jit_new_node_fp(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_dp(code, u, v) \ +#define jit_inc_synth_fp(name, u, v) \ + jit_code_inc_synth_fp(jit_code_##name, u, v) +#define jit_code_inc_synth_dp(code, u, v) \ do { \ - (void)jit_new_node_dp(jit_code_##code, u, v); \ + (void)jit_new_node_dp(code, u, v); \ jit_synth_inc(); \ } while (0) +#define jit_inc_synth_dp(name, u, v) \ + jit_code_inc_synth_dp(jit_code_##name, u, v) #define jit_dec_synth() jit_synth_dec() #define jit_link_prolog() \ @@ -241,8 +284,8 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_class_xpr 0x80000000 /* float / vector */ /* Used on sparc64 where %f0-%f31 can be encode for single float * but %f32 to %f62 only as double precision */ -#define jit_class_sng 0x10000000 /* Single precision float */ -#define jit_class_dbl 0x20000000 /* Only double precision float */ +#define jit_class_sng 0x00010000 /* Single precision float */ +#define jit_class_dbl 0x00020000 /* Only double precision float */ #define jit_regno_patch 0x00008000 /* this is a register * returned by a "user" call * to jit_get_reg() */ @@ -264,8 +307,9 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a0_flt 0x00000020 /* arg0 is immediate float */ #define jit_cc_a0_dbl 0x00000040 /* arg0 is immediate double */ #define jit_cc_a0_arg 0x00000080 /* arg1 is an argument int id */ -#define jit_cc_a1_reg 0x00000100 /* arg1 is a register */ -#define jit_cc_a1_chg 0x00000200 /* arg1 is modified */ +#define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */ +#define jit_cc_a1_reg 0x00000200 /* arg1 is a register */ +#define jit_cc_a1_chg 0x00000400 /* arg1 is modified */ #define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */ #define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */ #define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */ @@ -275,6 +319,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */ #define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */ #define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */ +#define jit_cc_a2_rlh 0x00800000 /* arg2 is a register pair */ #if __ia64__ || (__sparc__ && __WORDSIZE == 64) extern void @@ -359,6 +404,13 @@ typedef struct jit_value jit_value_t; typedef struct jit_compiler jit_compiler_t; typedef struct jit_function jit_function_t; typedef struct jit_register jit_register_t; +#if __arm__ +# if DISASSEMBLER +typedef struct jit_data_info jit_data_info_t; +# endif +#elif __riscv +typedef struct jit_const jit_const_t; +#endif union jit_data { struct { @@ -407,6 +459,9 @@ struct jit_block { jit_node_t *label; jit_regset_t reglive; jit_regset_t regmask; + jit_bool_t again; /* Flag need to rebuild regset masks + * due to changes in live and unknown + * state. */ }; struct jit_value { @@ -424,6 +479,19 @@ typedef struct { jit_node_t *node; } jit_patch_t; +#if __arm__ && DISASSEMBLER +struct jit_data_info { + jit_uword_t code; /* pointer in code buffer */ + jit_word_t length; /* length of constant vector */ +}; +#elif __riscv && __WORDSIZE == 64 +struct jit_const { + jit_word_t value; + jit_word_t address; + jit_const_t *next; +}; +#endif + struct jit_function { struct { jit_int32_t argi; @@ -478,7 +546,7 @@ struct jit_compiler { jit_int32_t breg; /* base register for prolog/epilog */ #endif #if __mips__ || __ia64__ || __alpha__ || \ - (__sparc__ && __WORDSIZE == 64) || __riscv + (__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__ jit_int32_t carry; #define jit_carry _jitc->carry #endif @@ -501,6 +569,7 @@ struct jit_compiler { jit_regset_t regsav; /* automatic spill only once */ jit_regset_t reglive; /* known live registers at some point */ jit_regset_t regmask; /* register mask to update reglive */ + jit_regset_t explive; /* explicitly marked as live */ struct { jit_uint8_t *end; } code; @@ -574,6 +643,27 @@ struct jit_compiler { jit_word_t length; } prolog; jit_bool_t jump; +#elif __riscv && __WORDSIZE == 64 + struct { + /* Hash table for constants to be resolved and patched */ + struct { + jit_const_t **table; /* very simple hash table */ + jit_word_t size; /* number of vectors in table */ + jit_word_t count; /* number of distinct entries */ + } hash; + struct { + jit_const_t **ptr; /* keep a single pointer */ + jit_const_t *list; /* free list */ + jit_word_t length; /* length of pool */ + } pool; + /* Linear list for constants that cannot be encoded easily */ + struct { + jit_word_t *instrs; /* list of direct movi instructions */ + jit_word_t *values; /* list of direct movi constants */ + jit_word_t offset; /* offset in instrs/values vector */ + jit_word_t length; /* length of instrs/values vector */ + } vector; + } consts; #endif }; @@ -589,6 +679,8 @@ struct jit_state { struct { jit_uint8_t *ptr; jit_word_t length; + /* PROTECTED bytes starting at PTR are mprotect'd. */ + jit_word_t protected; } code; struct { jit_uint8_t *ptr; @@ -691,6 +783,7 @@ _emit_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); extern void _emit_stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +extern void jit_init_print(void); extern void jit_init_debug(const char*); extern void jit_finish_debug(void); diff --git a/deps/lightning/include/lightning/jit_riscv.h b/deps/lightning/include/lightning/jit_riscv.h index 1b4f93d36..ad3f76fa7 100644 --- a/deps/lightning/include/lightning/jit_riscv.h +++ b/deps/lightning/include/lightning/jit_riscv.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_s390.h b/deps/lightning/include/lightning/jit_s390.h index 6ab196b10..a28b0dd30 100644 --- a/deps/lightning/include/lightning/jit_s390.h +++ b/deps/lightning/include/lightning/jit_s390.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_sparc.h b/deps/lightning/include/lightning/jit_sparc.h index bee440bb0..e5988e114 100644 --- a/deps/lightning/include/lightning/jit_sparc.h +++ b/deps/lightning/include/lightning/jit_sparc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_x86.h b/deps/lightning/include/lightning/jit_x86.h index a278d0624..91f91244e 100644 --- a/deps/lightning/include/lightning/jit_x86.h +++ b/deps/lightning/include/lightning/jit_x86.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index 582946482..229ebb574 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -211,6 +211,7 @@ typedef union { # undef ui } instr_t; # define stack_framesize 160 +# define s26_p(d) ((d) >= -33554432 && (d) <= 33554431) # define ii(i) *_jit->pc.ui++ = i # define ldr(r0,r1) ldr_l(r0,r1) # define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2) @@ -290,6 +291,7 @@ typedef union { # define A64_CBNZ 0x35000000 # define A64_B_C 0x54000000 # define A64_CSINC 0x1a800400 +# define A64_CSSEL 0x1a800000 # define A64_REV 0xdac00c00 # define A64_UDIV 0x1ac00800 # define A64_SDIV 0x1ac00c00 @@ -317,6 +319,8 @@ typedef union { # define A64_LDRSB 0x38e06800 # define A64_STR 0xf8206800 # define A64_LDR 0xf8606800 +# define A64_LDAXR 0xc85ffc00 +# define A64_STLXR 0xc800fc00 # define A64_STRH 0x78206800 # define A64_LDRH 0x78606800 # define A64_LDRSH 0x78a06800 @@ -444,6 +448,8 @@ typedef union { # define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm) # define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12) # define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9) +# define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn) +# define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt) # define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm) # define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12) # define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9) @@ -461,6 +467,7 @@ typedef union { # define LDPI_PRE(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7) # define STPI_POS(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7) # define CSET(Rd,Cc) CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc) +# define CSEL(Rd,Rn,Rm,Cc) oxxxc(A64_CSSEL|XS,Rd,Rn,Rm,Cc) # define B(Simm26) o26(A64_B,Simm26) # define BL(Simm26) o26(A64_BL,Simm26) # define BR(Rn) o_x_(A64_BR,Rn) @@ -572,6 +579,10 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshr_u(r0,r1,r2) LSR(r0,r1,r2) # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) MVN(r0,r1) # define andr(r0,r1,r2) AND(r0,r1,r2) @@ -657,23 +668,22 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxr_l(r0,r1,r2) STR(r2,r1,r0) # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) REV(r0,r1) -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) REV(r0,r1) # define extr_c(r0,r1) SXTB(r0,r1) # define extr_uc(r0,r1) UXTB(r0,r1) # define extr_s(r0,r1) SXTH(r0,r1) # define extr_us(r0,r1) UXTH(r0,r1) # define extr_i(r0,r1) SXTW(r0,r1) # define extr_ui(r0,r1) UXTW(r0,r1) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define movr(r0,r1) _movr(_jit,r0,r1) static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) @@ -772,12 +782,12 @@ _bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); # define bmci(i0,r0,i1) bmxi(BCC_EQ,i0,r0,i1) # define jmpr(r0) BR(r0) # define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) BLR(r0) # define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -903,7 +913,7 @@ static void _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26) { instr_t i; - assert(Simm26 >= -33554432 && Simm26 <= 33554431); + assert(s26_p(Simm26)); assert(!(Op & ~0xfc000000)); i.w = Op; i.imm26.b = Simm26; @@ -1375,6 +1385,20 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPI(r2, 0); + CSEL(r0, r0, r1, CC_NE); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPI(r2, 0); + CSEL(r0, r0, r1, CC_EQ); +} + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1441,21 +1465,19 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 48); } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 32); } -#endif static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) @@ -1814,6 +1836,33 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* retry: */ + retry = _jit->pc.w; + LDAXR(r0, r1); + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ + STLXR(r3, r0, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + CSET(r0, CC_EQ); + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -2118,20 +2167,22 @@ _bmxi(jit_state_t *_jit, jit_int32_t cc, return (w); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (w >= -33554432 && w <= 33554431) - B(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s26_p(d)) + B(d); else { reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i0); jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2146,20 +2197,22 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (w >= -33554432 && w <= 33554431) - BL(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s26_p(d)) + BL(d); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); callr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2240,6 +2293,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) jit_unget_reg(reg); } +#if !__APPLE__ if (_jitc->function->self.call & jit_call_varargs) { /* Save gp registers in the save area, if any is a vararg */ for (reg = 8 - _jitc->function->vagp / -8; @@ -2257,6 +2311,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) + reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg)); } +#endif } static void @@ -2304,6 +2359,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node) static void _vastart(jit_state_t *_jit, jit_int32_t r0) { +#if !__APPLE__ jit_int32_t reg; assert(_jitc->function->self.call & jit_call_varargs); @@ -2334,11 +2390,16 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); jit_unget_reg(reg); +#else + assert(_jitc->function->self.call & jit_call_varargs); + addi(r0, FP_REGNO, _jitc->function->self.size); +#endif } static void _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if !__APPLE__ jit_word_t ge_code; jit_word_t lt_code; jit_int32_t rg0, rg1; @@ -2387,6 +2448,11 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); +#else + assert(_jitc->function->self.call & jit_call_varargs); + ldr(r0, r1); + addi(r1, r1, sizeof(jit_word_t)); +#endif } static void @@ -2406,7 +2472,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) ffc = i.w & 0xffc00000; if (fc == A64_B || fc == A64_BL) { d = (label - instr) >> 2; - assert(d >= -33554432 && d <= 33554431); + assert(s26_p(d)); i.imm26.b = d; u.i[0] = i.w; } diff --git a/deps/lightning/lib/jit_aarch64-fpu.c b/deps/lightning/lib/jit_aarch64-fpu.c index 871ba7e20..52f905edd 100644 --- a/deps/lightning/lib/jit_aarch64-fpu.c +++ b/deps/lightning/lib/jit_aarch64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -862,6 +862,7 @@ dbopi(ltgt) static void _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if !__APPLE__ jit_word_t ge_code; jit_word_t lt_code; jit_int32_t rg0, rg1; @@ -910,5 +911,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); +#else + assert(_jitc->function->self.call & jit_call_varargs); + ldr_d(r0, r1); + addi(r1, r1, sizeof(jit_float64_t)); +#endif } #endif diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index 7e22e0e7b..43207af8a 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -1,20 +1,25 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 120 +# if PACKED_STACK +#define JIT_INSTR_MAX 96 0, /* data */ 0, /* live */ - 4, /* align */ + 12, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 120, /* prolog */ + 96, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,11 +27,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 44, /* va_start */ - 64, /* va_arg */ - 72, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 12, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 20, /* addi */ @@ -95,29 +112,36 @@ 8, /* nei */ 4, /* movr */ 16, /* movi */ + 8, /* movnr */ + 8, /* movzr */ + 28, /* casr */ + 36, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ 8, /* htonr_us */ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 16, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 16, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 16, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 16, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 16, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 16, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 16, /* ldi_l */ 8, /* ldxr_c */ 20, /* ldxi_c */ 4, /* ldxr_uc */ @@ -133,13 +157,13 @@ 4, /* ldxr_l */ 20, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 16, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 16, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 16, /* sti_l */ 4, /* stxr_c */ 20, /* stxi_c */ 4, /* stxr_s */ @@ -189,17 +213,41 @@ 8, /* bxsubr_u */ 8, /* bxsubi_u */ 4, /* jmpr */ - 20, /* jmpi */ + 4, /* jmpi */ 4, /* callr */ - 20, /* calli */ + 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -259,11 +307,11 @@ 4, /* movr_f */ 8, /* movi_f */ 8, /* ldr_f */ - 16, /* ldi_f */ + 20, /* ldi_f */ 8, /* ldxr_f */ 24, /* ldxi_f */ 8, /* str_f */ - 16, /* sti_f */ + 20, /* sti_f */ 8, /* stxr_f */ 24, /* stxi_f */ 8, /* bltr_f */ @@ -350,11 +398,11 @@ 4, /* movr_d */ 12, /* movi_d */ 8, /* ldr_d */ - 16, /* ldi_d */ + 20, /* ldi_d */ 8, /* ldxr_d */ 24, /* ldxi_d */ 8, /* str_d */ - 16, /* sti_d */ + 20, /* sti_d */ 8, /* stxr_d */ 24, /* stxi_d */ 8, /* bltr_d */ @@ -399,4 +447,453 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + +# else /* PACKED_STACK */ +#define JIT_INSTR_MAX 96 + 0, /* data */ + 0, /* live */ + 12, /* align */ + 0, /* save */ + 0, /* load */ + 4, /* skip */ + 0, /* #name */ + 0, /* #note */ + 0, /* label */ + 96, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 12, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 20, /* addi */ + 4, /* addcr */ + 12, /* addci */ + 4, /* addxr */ + 8, /* addxi */ + 4, /* subr */ + 20, /* subi */ + 4, /* subcr */ + 12, /* subci */ + 4, /* subxr */ + 8, /* subxi */ + 24, /* rsbi */ + 4, /* mulr */ + 20, /* muli */ + 12, /* qmulr */ + 20, /* qmuli */ + 12, /* qmulr_u */ + 20, /* qmuli_u */ + 4, /* divr */ + 20, /* divi */ + 4, /* divr_u */ + 12, /* divi_u */ + 20, /* qdivr */ + 16, /* qdivi */ + 20, /* qdivr_u */ + 16, /* qdivi_u */ + 12, /* remr */ + 28, /* remi */ + 12, /* remr_u */ + 20, /* remi_u */ + 4, /* andr */ + 20, /* andi */ + 4, /* orr */ + 20, /* ori */ + 4, /* xorr */ + 20, /* xori */ + 4, /* lshr */ + 4, /* lshi */ + 4, /* rshr */ + 4, /* rshi */ + 4, /* rshr_u */ + 4, /* rshi_u */ + 4, /* negr */ + 4, /* comr */ + 8, /* ltr */ + 8, /* lti */ + 8, /* ltr_u */ + 8, /* lti_u */ + 8, /* ler */ + 8, /* lei */ + 8, /* ler_u */ + 8, /* lei_u */ + 8, /* eqr */ + 8, /* eqi */ + 8, /* ger */ + 8, /* gei */ + 8, /* ger_u */ + 8, /* gei_u */ + 8, /* gtr */ + 8, /* gti */ + 8, /* gtr_u */ + 8, /* gti_u */ + 8, /* ner */ + 8, /* nei */ + 4, /* movr */ + 16, /* movi */ + 8, /* movnr */ + 8, /* movzr */ + 28, /* casr */ + 36, /* casi */ + 4, /* extr_c */ + 4, /* extr_uc */ + 4, /* extr_s */ + 4, /* extr_us */ + 4, /* extr_i */ + 4, /* extr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ + 8, /* htonr_us */ + 8, /* htonr_ui */ + 4, /* htonr_ul */ + 4, /* ldr_c */ + 16, /* ldi_c */ + 4, /* ldr_uc */ + 16, /* ldi_uc */ + 4, /* ldr_s */ + 16, /* ldi_s */ + 4, /* ldr_us */ + 16, /* ldi_us */ + 4, /* ldr_i */ + 16, /* ldi_i */ + 4, /* ldr_ui */ + 16, /* ldi_ui */ + 4, /* ldr_l */ + 16, /* ldi_l */ + 8, /* ldxr_c */ + 20, /* ldxi_c */ + 4, /* ldxr_uc */ + 20, /* ldxi_uc */ + 4, /* ldxr_s */ + 16, /* ldxi_s */ + 4, /* ldxr_us */ + 16, /* ldxi_us */ + 4, /* ldxr_i */ + 20, /* ldxi_i */ + 4, /* ldxr_ui */ + 16, /* ldxi_ui */ + 4, /* ldxr_l */ + 20, /* ldxi_l */ + 4, /* str_c */ + 16, /* sti_c */ + 4, /* str_s */ + 16, /* sti_s */ + 4, /* str_i */ + 16, /* sti_i */ + 4, /* str_l */ + 16, /* sti_l */ + 4, /* stxr_c */ + 20, /* stxi_c */ + 4, /* stxr_s */ + 20, /* stxi_s */ + 4, /* stxr_i */ + 20, /* stxi_i */ + 4, /* stxr_l */ + 20, /* stxi_l */ + 8, /* bltr */ + 8, /* blti */ + 8, /* bltr_u */ + 8, /* blti_u */ + 8, /* bler */ + 8, /* blei */ + 8, /* bler_u */ + 8, /* blei_u */ + 8, /* beqr */ + 24, /* beqi */ + 8, /* bger */ + 8, /* bgei */ + 8, /* bger_u */ + 8, /* bgei_u */ + 8, /* bgtr */ + 8, /* bgti */ + 8, /* bgtr_u */ + 8, /* bgti_u */ + 8, /* bner */ + 24, /* bnei */ + 8, /* bmsr */ + 8, /* bmsi */ + 8, /* bmcr */ + 8, /* bmci */ + 8, /* boaddr */ + 8, /* boaddi */ + 8, /* boaddr_u */ + 8, /* boaddi_u */ + 8, /* bxaddr */ + 8, /* bxaddi */ + 8, /* bxaddr_u */ + 8, /* bxaddi_u */ + 8, /* bosubr */ + 8, /* bosubi */ + 8, /* bosubr_u */ + 8, /* bosubi_u */ + 8, /* bxsubr */ + 8, /* bxsubi */ + 8, /* bxsubr_u */ + 8, /* bxsubi_u */ + 4, /* jmpr */ + 4, /* jmpi */ + 4, /* callr */ + 16, /* calli */ + 0, /* prepare */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 96, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 4, /* addr_f */ + 12, /* addi_f */ + 4, /* subr_f */ + 12, /* subi_f */ + 12, /* rsbi_f */ + 4, /* mulr_f */ + 12, /* muli_f */ + 4, /* divr_f */ + 12, /* divi_f */ + 4, /* negr_f */ + 4, /* absr_f */ + 4, /* sqrtr_f */ + 8, /* ltr_f */ + 16, /* lti_f */ + 8, /* ler_f */ + 16, /* lei_f */ + 8, /* eqr_f */ + 16, /* eqi_f */ + 8, /* ger_f */ + 16, /* gei_f */ + 8, /* gtr_f */ + 16, /* gti_f */ + 8, /* ner_f */ + 16, /* nei_f */ + 8, /* unltr_f */ + 16, /* unlti_f */ + 8, /* unler_f */ + 16, /* unlei_f */ + 16, /* uneqr_f */ + 24, /* uneqi_f */ + 8, /* unger_f */ + 16, /* ungei_f */ + 8, /* ungtr_f */ + 16, /* ungti_f */ + 16, /* ltgtr_f */ + 24, /* ltgti_f */ + 8, /* ordr_f */ + 16, /* ordi_f */ + 8, /* unordr_f */ + 16, /* unordi_f */ + 8, /* truncr_f_i */ + 4, /* truncr_f_l */ + 4, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 8, /* movi_f */ + 8, /* ldr_f */ + 20, /* ldi_f */ + 8, /* ldxr_f */ + 24, /* ldxi_f */ + 8, /* str_f */ + 20, /* sti_f */ + 8, /* stxr_f */ + 24, /* stxi_f */ + 8, /* bltr_f */ + 16, /* blti_f */ + 8, /* bler_f */ + 16, /* blei_f */ + 8, /* beqr_f */ + 16, /* beqi_f */ + 8, /* bger_f */ + 16, /* bgei_f */ + 8, /* bgtr_f */ + 16, /* bgti_f */ + 8, /* bner_f */ + 16, /* bnei_f */ + 8, /* bunltr_f */ + 16, /* bunlti_f */ + 8, /* bunler_f */ + 16, /* bunlei_f */ + 16, /* buneqr_f */ + 24, /* buneqi_f */ + 8, /* bunger_f */ + 16, /* bungei_f */ + 8, /* bungtr_f */ + 16, /* bungti_f */ + 16, /* bltgtr_f */ + 24, /* bltgti_f */ + 8, /* bordr_f */ + 16, /* bordi_f */ + 8, /* bunordr_f */ + 16, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 12, /* addi_d */ + 4, /* subr_d */ + 12, /* subi_d */ + 12, /* rsbi_d */ + 4, /* mulr_d */ + 12, /* muli_d */ + 4, /* divr_d */ + 12, /* divi_d */ + 4, /* negr_d */ + 4, /* absr_d */ + 4, /* sqrtr_d */ + 8, /* ltr_d */ + 16, /* lti_d */ + 8, /* ler_d */ + 16, /* lei_d */ + 8, /* eqr_d */ + 16, /* eqi_d */ + 8, /* ger_d */ + 16, /* gei_d */ + 8, /* gtr_d */ + 16, /* gti_d */ + 8, /* ner_d */ + 16, /* nei_d */ + 8, /* unltr_d */ + 16, /* unlti_d */ + 8, /* unler_d */ + 16, /* unlei_d */ + 16, /* uneqr_d */ + 24, /* uneqi_d */ + 8, /* unger_d */ + 16, /* ungei_d */ + 8, /* ungtr_d */ + 16, /* ungti_d */ + 16, /* ltgtr_d */ + 24, /* ltgti_d */ + 8, /* ordr_d */ + 16, /* ordi_d */ + 8, /* unordr_d */ + 16, /* unordi_d */ + 8, /* truncr_d_i */ + 4, /* truncr_d_l */ + 4, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 12, /* movi_d */ + 8, /* ldr_d */ + 20, /* ldi_d */ + 8, /* ldxr_d */ + 24, /* ldxi_d */ + 8, /* str_d */ + 20, /* sti_d */ + 8, /* stxr_d */ + 24, /* stxi_d */ + 8, /* bltr_d */ + 16, /* blti_d */ + 8, /* bler_d */ + 16, /* blei_d */ + 8, /* beqr_d */ + 20, /* beqi_d */ + 8, /* bger_d */ + 16, /* bgei_d */ + 8, /* bgtr_d */ + 16, /* bgti_d */ + 8, /* bner_d */ + 16, /* bnei_d */ + 8, /* bunltr_d */ + 16, /* bunlti_d */ + 8, /* bunler_d */ + 16, /* bunlei_d */ + 16, /* buneqr_d */ + 24, /* buneqi_d */ + 8, /* bunger_d */ + 16, /* bungei_d */ + 8, /* bungtr_d */ + 16, /* bungti_d */ + 16, /* bltgtr_d */ + 24, /* bltgti_d */ + 8, /* bordr_d */ + 16, /* bordi_d */ + 8, /* bunordr_d */ + 16, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 0, /* movr_w_f */ + 0, /* movr_ww_d */ + 0, /* movr_w_d */ + 0, /* movr_f_w */ + 0, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 0, /* movr_d_w */ + 0, /* movi_d_w */ +# endif #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index 585618cdb..499a416ff 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -20,6 +20,9 @@ #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +#if __APPLE__ +typedef jit_pointer_t jit_va_list_t; +#else typedef struct jit_qreg { jit_float64_t l; jit_float64_t h; @@ -52,6 +55,7 @@ typedef struct jit_va_list { jit_qreg_t q6; jit_qreg_t q7; } jit_va_list_t; +#endif /* * Prototypes @@ -72,7 +76,11 @@ extern void __clear_cache(void *, void *); */ jit_register_t _rvs[] = { { rc(gpr) | 0x08, "x8" }, +#if __APPLE__ + { 0x12, "x18" }, +#else { rc(gpr) | 0x12, "x18" }, +#endif { rc(gpr) | 0x11, "x17" }, { rc(gpr) | 0x10, "x16" }, { rc(gpr) | 0x09, "x9" }, @@ -258,20 +266,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -331,7 +337,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -351,6 +357,7 @@ _jit_ellipsis(jit_state_t *_jit) assert(!(_jitc->function->self.call & jit_call_varargs)); _jitc->function->self.call |= jit_call_varargs; +#if !__APPLE_ /* Allocate va_list like object in the stack, * with enough space to save all argument * registers, and use fixed offsets for them. */ @@ -367,6 +374,7 @@ _jit_ellipsis(jit_state_t *_jit) _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16; else _jitc->function->vafp = 0; +#endif } jit_dec_synth(); } @@ -380,7 +388,7 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; @@ -389,10 +397,21 @@ _jit_arg(jit_state_t *_jit) if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { +#if PACKED_STACK || STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & ((1 << (code - jit_code_arg_c)) - 1); +#endif offset = _jitc->function->self.size; +#if PACKED_STACK + _jitc->function->self.size += 1 << (code - jit_code_arg_c); +#else _jitc->function->self.size += sizeof(jit_word_t); +#endif } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -408,8 +427,16 @@ _jit_arg_f(jit_state_t *_jit) if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; else { +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & (sizeof(jit_float32_t) - 1); +#endif offset = _jitc->function->self.size; +#if PACKED_STACK + _jitc->function->self.size += sizeof(jit_float32_t); +#else _jitc->function->self.size += sizeof(jit_word_t); +#endif } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -427,8 +454,12 @@ _jit_arg_d(jit_state_t *_jit) if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; else { +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & (sizeof(jit_float64_t) - 1); +#endif offset = _jitc->function->self.size; - _jitc->function->self.size += sizeof(jit_word_t); + _jitc->function->self.size += sizeof(jit_float64_t); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -439,10 +470,15 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_c(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_c(u, JIT_FP, v->u.w); jit_dec_synth(); @@ -451,10 +487,15 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_uc(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_uc(u, JIT_FP, v->u.w); jit_dec_synth(); @@ -463,10 +504,15 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_s(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_s(u, JIT_FP, v->u.w); jit_dec_synth(); @@ -475,10 +521,15 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_us(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_us(u, JIT_FP, v->u.w); jit_dec_synth(); @@ -487,22 +538,33 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK || __WORDSIZE == 32 + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_i(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_i(u, JIT_FP, v->u.w); jit_dec_synth(); } +#if __WORDSIZE == 64 void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_ui(u, JIT_RA0 - v->u.w); +#endif + } else jit_ldxi_ui(u, JIT_FP, v->u.w); jit_dec_synth(); @@ -511,7 +573,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); @@ -519,31 +581,106 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_ldxi_l(u, JIT_FP, v->u.w); jit_dec_synth(); } +#endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); - if (jit_arg_reg_p(v->u.w)) - jit_movr(JIT_RA0 - v->u.w, u); - else + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) { + jit_int32_t regno = JIT_RA0 - v->u.w; +#if PACKED_STACK + switch (code) { + case jit_code_putargr_c: jit_extr_c(regno, u); break; + case jit_code_putargr_uc: jit_extr_uc(regno, u); break; + case jit_code_putargr_s: jit_extr_s(regno, u); break; + case jit_code_putargr_us: jit_extr_us(regno, u); break; +# if __WORDISZE == 32 + case jit_code_putargr_i: jit_movr(regno, u); break; +# else + case jit_code_putargr_i: jit_extr_i(regno, u); break; + case jit_code_putargr_ui: jit_extr_ui(regno, u); break; + case jit_code_putargr_l: jit_movr(regno, u); break; +# endif + default: abort(); break; + } +#else + jit_movr(regno, u); +#endif + } + else { +#if PACKED_STACK + switch (code) { + case jit_code_putargr_c: case jit_code_putargr_uc: + jit_stxi_c(v->u.w, JIT_FP, u); break; + case jit_code_putargr_s: case jit_code_putargr_us: + jit_stxi_s(v->u.w, JIT_FP, u); break; +# if __WORDSIZE == 32 + case jit_code_putargr_i: + jit_stxi(v->u.w, JIT_FP, u); break; +# else + case jit_code_putargr_i: case jit_code_putargr_ui: + jit_stxi_i(v->u.w, JIT_FP, u); break; + case jit_code_putargr_l: + jit_stxi(v->u.w, JIT_FP, u); break; +# endif + default: abort(); break; + } +#else jit_stxi(v->u.w, JIT_FP, u); +#endif + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); +#if PACKED_STACK + switch (code) { + case jit_code_putargi_c: u = (jit_int8_t)u; break; + case jit_code_putargi_uc: u = (jit_uint8_t)u; break; + case jit_code_putargi_s: u = (jit_int16_t)u; break; + case jit_code_putargi_us: u = (jit_uint16_t)u; break; +# if __WORDSIZE == 32 + case jit_code_putargi_i: break; +# else + case jit_code_putargi_i: u = (jit_int32_t)u; break; + case jit_code_putargi_ui: u = (jit_uint32_t)u; break; + case jit_code_putargi_l: break; +# endif + default: abort(); break; + } +#endif if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); +#if PACKED_STACK + switch (code) { + case jit_code_putargi_c: case jit_code_putargi_uc: + jit_stxi_c(v->u.w, JIT_FP, regno); break; + case jit_code_putargi_s: case jit_code_putargi_us: + jit_stxi_s(v->u.w, JIT_FP, regno); break; +# if __WORDSIZE == 32 + case jit_code_putargi_i: + jit_stxi(v->u.w, JIT_FP, regno); break; +# else + case jit_code_putargi_i: case jit_code_putargi_ui: + jit_stxi_i(v->u.w, JIT_FP, regno); break; + case jit_code_putargi_l: + jit_stxi(v->u.w, JIT_FP, regno); break; +# endif + default: abort(); break; + } +#else jit_stxi(v->u.w, JIT_FP, regno); +#endif jit_unget_reg(regno); } jit_dec_synth(); @@ -632,39 +769,148 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movr(JIT_RA0 - _jitc->function->call.argi, u); + jit_int32_t regno = JIT_RA0 - _jitc->function->call.argi; +#if PACKED_STACK + switch (code) { + case jit_code_pushargr_c: jit_extr_c(regno, u); break; + case jit_code_pushargr_uc: jit_extr_uc(regno, u); break; + case jit_code_pushargr_s: jit_extr_s(regno, u); break; + case jit_code_pushargr_us: jit_extr_us(regno, u); break; +# if __WORDISZE == 32 + case jit_code_pushargr_i: jit_movr(regno, u); break; +# else + case jit_code_pushargr_i: jit_extr_i(regno, u); break; + case jit_code_pushargr_ui: jit_extr_ui(regno, u); break; + case jit_code_pushargr_l: jit_movr(regno, u); break; +# endif + default: abort(); break; + } +#else + jit_movr(regno, u); +#endif +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(code == jit_code_pushargr); + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argi; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & + ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1); + switch (code) { + case jit_code_pushargr_c: case jit_code_pushargr_uc: + jit_stxi_c(_jitc->function->call.size, JIT_SP, u); + break; + case jit_code_pushargr_s: case jit_code_pushargr_us: + jit_stxi_s(_jitc->function->call.size, JIT_SP, u); + break; +# if __WORDSIZE == 32 + case jit_code_pushargr_i: + jit_stxi(_jitc->function->call.size, JIT_SP, u); + break; +# else + case jit_code_pushargr_i: case jit_code_pushargr_ui: + jit_stxi_i(_jitc->function->call.size, JIT_SP, u); + break; + case jit_code_pushargr_l: + jit_stxi(_jitc->function->call.size, JIT_SP, u); + break; +# endif + default: + abort(); + break; + } + _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2); +#else jit_stxi(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); +#endif } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); +#if PACKED_STACK + switch (code) { + case jit_code_pushargi_c: u = (jit_int8_t)u; break; + case jit_code_pushargi_uc: u = (jit_uint8_t)u; break; + case jit_code_pushargi_s: u = (jit_int16_t)u; break; + case jit_code_pushargi_us: u = (jit_uint16_t)u; break; +# if __WORDSIZE == 32 + case jit_code_pushargi_i: break; +# else + case jit_code_pushargi_i: u = (jit_int32_t)u; break; + case jit_code_pushargi_ui: u = (jit_uint32_t)u; break; + case jit_code_pushargi_l: break; +# endif + default: abort(); break; + } +#endif if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movi(JIT_RA0 - _jitc->function->call.argi, u); + regno = JIT_RA0 - _jitc->function->call.argi; + jit_movi(regno, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(code == jit_code_pushargi); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argi; } else { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & + ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1); + switch (code) { + case jit_code_pushargi_c: case jit_code_pushargi_uc: + jit_stxi_c(_jitc->function->call.size, JIT_SP, regno); + break; + case jit_code_pushargi_s: case jit_code_pushargi_us: + jit_stxi_s(_jitc->function->call.size, JIT_SP, regno); + break; +# if __WORDSIZE == 32 + case jit_code_pushargi_i: + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + break; +# else + case jit_code_pushargi_i: case jit_code_pushargi_ui: + jit_stxi_i(_jitc->function->call.size, JIT_SP, regno); + break; + case jit_code_pushargi_l: + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + break; +# endif + default: + abort(); + break; + } + _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2); +#else jit_stxi(_jitc->function->call.size, JIT_SP, regno); - jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_unget_reg(regno); } jit_dec_synth(); } @@ -677,11 +923,26 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float32_t) == sizeof(jit_word_t)); + jit_stxi_f(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argf; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float32_t) - 1); + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float32_t); +#else jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); +#endif } jit_dec_synth(); } @@ -695,14 +956,29 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float32_t) == sizeof(jit_word_t)); + jit_stxi_f(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argf; } else { regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float32_t) - 1); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_float32_t); +#else jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); - jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_unget_reg(regno); } jit_dec_synth(); } @@ -715,11 +991,23 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float64_t) == sizeof(jit_word_t)); + jit_stxi_d(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_float64_t); + } +#endif ++_jitc->function->call.argf; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float64_t) - 1); +#endif jit_stxi_d(_jitc->function->call.size, JIT_SP, u); - _jitc->function->call.size += sizeof(jit_word_t); + _jitc->function->call.size += sizeof(jit_float64_t); } jit_dec_synth(); } @@ -733,14 +1021,26 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float64_t) == sizeof(jit_word_t)); + jit_stxi_d(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_float64_t); + } +#endif ++_jitc->function->call.argf; } else { regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float64_t) - 1); +#endif jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); - _jitc->function->call.size += sizeof(jit_word_t); + _jitc->function->call.size += sizeof(jit_float64_t); } jit_dec_synth(); } @@ -770,6 +1070,10 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) jit_node_t *node; assert(_jitc->function); jit_inc_synth_w(finishr, r0); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_word_t) - 1); +#endif if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; node = jit_callr(r0); @@ -787,6 +1091,10 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) jit_node_t *node; assert(_jitc->function); jit_inc_synth_w(finishi, (jit_word_t)i0); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_word_t) - 1); +#endif if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; node = jit_calli(i0); @@ -835,10 +1143,15 @@ void _jit_retval_i(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_i, r0); +#if __WORDSIZE == 32 + jit_movr(r0, JIT_RET); +#else jit_extr_i(r0, JIT_RET); +#endif jit_dec_synth(); } +#if __WORDSIZE == 64 void _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) { @@ -851,10 +1164,10 @@ void _jit_retval_l(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_l, r0); - if (r0 != JIT_RET) - jit_movr(r0, JIT_RET); + jit_movr(r0, JIT_RET); jit_dec_synth(); } +#endif void _jit_retval_f(jit_state_t *_jit, jit_int32_t r0) @@ -886,6 +1199,10 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t const_offset; jit_int32_t patch_offset; } undo; @@ -1005,11 +1322,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1115,13 +1434,26 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); + case_rrr(movn,); + case_rrr(movz,); case jit_code_movi: if (node->flag & jit_flag_node) { temp = node->v.n; @@ -1376,7 +1708,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s26_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1394,7 +1731,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s26_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } @@ -1405,6 +1747,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1422,6 +1768,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1443,11 +1799,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1455,16 +1823,34 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: case jit_code_retval_s: case jit_code_retval_us: case jit_code_retval_i: +#if __WORDSIZE == 64 case jit_code_retval_ui: case jit_code_retval_l: +#endif case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c index 8bfef9caf..627859d8d 100644 --- a/deps/lightning/lib/jit_alpha-cpu.c +++ b/deps/lightning/lib/jit_alpha-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -311,6 +311,13 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) CMOVNE(r2, r1, r0) +# define movzr(r0,r1,r2) CMOVEQ(r2, r1, r0) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) @@ -622,21 +629,15 @@ static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); # define jmpr(r0) JMP(_R31_REGNO,r0,0) # define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*, jit_word_t); +static jit_word_t _jmpi(jit_state_t*, jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -811,6 +812,34 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_word_t jump0, jump1, again, done; + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + again = _jit->pc.w; /* AGAIN */ + LDQ_L(r0, r1, 0); /* Load r0 locked */ + jump0 = bner(_jit->pc.w, r0, r2); /* bne FAIL r0 r2 */ + movr(r0, r3); /* Move to r0 to attempt to store */ + STQ_C(r0, r1, 0); /* r0 is an in/out argument */ + jump1 = _jit->pc.w; + BEQ(r0, 0); /* beqi AGAIN r0 0 */ + patch_at(jump1, again); + jump1 = _jit->pc.w; + BR(_R31_REGNO, 0); /* r0 set to 1 if store succeeded */ + patch_at(jump0, _jit->pc.w); /* FAIL: */ + movi(r0, 0); /* Already locked */ + patch_at(jump1, _jit->pc.w); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2453,7 +2482,7 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -2465,7 +2494,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; @@ -2491,7 +2520,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; @@ -2514,7 +2543,7 @@ _htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(t0); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { jit_word_t w; @@ -2524,7 +2553,8 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) if (_s21_p(d)) BR(_R31_REGNO, d); else - (void)jmpi_p(i0); + w = jmpi_p(i0); + return (w); } static jit_word_t diff --git a/deps/lightning/lib/jit_alpha-fpu.c b/deps/lightning/lib/jit_alpha-fpu.c index ea5c7465a..5452a1ea7 100644 --- a/deps/lightning/lib/jit_alpha-fpu.c +++ b/deps/lightning/lib/jit_alpha-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index e1a572aab..826589e18 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -1,20 +1,23 @@ - #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 76 +#define JIT_INSTR_MAX 88 0, /* data */ 0, /* live */ - 4, /* align */ + 12, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 76, /* prolog */ + 88, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,11 +25,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 20, /* va_start */ + 24, /* va_arg */ + 44, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 32, /* addi */ @@ -47,18 +62,18 @@ 56, /* qmuli */ 12, /* qmulr_u */ 32, /* qmuli_u */ - 48, /* divr */ - 72, /* divi */ - 48, /* divr_u */ - 72, /* divi_u */ - 56, /* qdivr */ - 56, /* qdivi */ - 56, /* qdivr_u */ - 56, /* qdivi_u */ - 48, /* remr */ - 72, /* remi */ - 48, /* remr_u */ - 72, /* remi_u */ + 44, /* divr */ + 68, /* divi */ + 44, /* divr_u */ + 68, /* divi_u */ + 52, /* qdivr */ + 52, /* qdivi */ + 52, /* qdivr_u */ + 52, /* qdivi_u */ + 44, /* remr */ + 68, /* remi */ + 44, /* remr_u */ + 68, /* remi_u */ 4, /* andr */ 32, /* andi */ 4, /* orr */ @@ -95,12 +110,19 @@ 12, /* nei */ 4, /* movr */ 32, /* movi */ + 4, /* movnr */ + 4, /* movzr */ + 32, /* casr */ + 60, /* casi */ 8, /* extr_c */ 8, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 8, /* extr_i */ 8, /* extr_ui */ + 16, /* bswapr_us */ + 36, /* bswapr_ui */ + 36, /* bswapr_ul */ 16, /* htonr_us */ 36, /* htonr_ui */ 36, /* htonr_ul */ @@ -119,19 +141,19 @@ 4, /* ldr_l */ 32, /* ldi_l */ 16, /* ldxr_c */ - 12, /* ldxi_c */ + 44, /* ldxi_c */ 8, /* ldxr_uc */ - 4, /* ldxi_uc */ + 36, /* ldxi_uc */ 16, /* ldxr_s */ - 12, /* ldxi_s */ + 44, /* ldxi_s */ 8, /* ldxr_us */ - 4, /* ldxi_us */ + 36, /* ldxi_us */ 8, /* ldxr_i */ - 4, /* ldxi_i */ + 36, /* ldxi_i */ 16, /* ldxr_ui */ - 12, /* ldxi_ui */ + 44, /* ldxi_ui */ 8, /* ldxr_l */ - 4, /* ldxi_l */ + 36, /* ldxi_l */ 4, /* str_c */ 32, /* sti_c */ 4, /* str_s */ @@ -141,13 +163,13 @@ 4, /* str_l */ 32, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 36, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 36, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 36, /* stxi_i */ 8, /* stxr_l */ - 4, /* stxi_l */ + 36, /* stxi_l */ 8, /* bltr */ 8, /* blti */ 8, /* bltr_u */ @@ -188,18 +210,42 @@ 32, /* bxsubi */ 16, /* bxsubr_u */ 16, /* bxsubi_u */ - 0, /* jmpr */ - 36, /* jmpi */ + 4, /* jmpr */ + 4, /* jmpi */ 8, /* callr */ 36, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -207,93 +253,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 68, /* epilog */ + 76, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 8, /* addr_f */ - 32, /* addi_f */ + 40, /* addi_f */ 8, /* subr_f */ - 32, /* subi_f */ - 32, /* rsbi_f */ + 40, /* subi_f */ + 40, /* rsbi_f */ 8, /* mulr_f */ - 32, /* muli_f */ + 40, /* muli_f */ 8, /* divr_f */ - 32, /* divi_f */ + 40, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 8, /* sqrtr_f */ 32, /* ltr_f */ - 56, /* lti_f */ + 64, /* lti_f */ 32, /* ler_f */ - 56, /* lei_f */ + 64, /* lei_f */ 32, /* eqr_f */ - 56, /* eqi_f */ + 64, /* eqi_f */ 32, /* ger_f */ - 56, /* gei_f */ + 64, /* gei_f */ 32, /* gtr_f */ - 56, /* gti_f */ + 64, /* gti_f */ 32, /* ner_f */ - 56, /* nei_f */ + 64, /* nei_f */ 32, /* unltr_f */ - 56, /* unlti_f */ + 64, /* unlti_f */ 32, /* unler_f */ - 56, /* unlei_f */ + 64, /* unlei_f */ 32, /* uneqr_f */ - 56, /* uneqi_f */ + 64, /* uneqi_f */ 32, /* unger_f */ - 56, /* ungei_f */ + 64, /* ungei_f */ 32, /* ungtr_f */ - 56, /* ungti_f */ + 64, /* ungti_f */ 32, /* ltgtr_f */ - 56, /* ltgti_f */ + 64, /* ltgti_f */ 20, /* ordr_f */ - 44, /* ordi_f */ + 52, /* ordi_f */ 20, /* unordr_f */ - 44, /* unordi_f */ + 52, /* unordi_f */ 16, /* truncr_f_i */ 16, /* truncr_f_l */ 12, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 24, /* movi_f */ + 32, /* movi_f */ 4, /* ldr_f */ 32, /* ldi_f */ 8, /* ldxr_f */ - 4, /* ldxi_f */ + 36, /* ldxi_f */ 4, /* str_f */ 32, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 36, /* stxi_f */ 24, /* bltr_f */ - 48, /* blti_f */ + 56, /* blti_f */ 24, /* bler_f */ - 48, /* blei_f */ + 56, /* blei_f */ 24, /* beqr_f */ - 48, /* beqi_f */ + 56, /* beqi_f */ 24, /* bger_f */ - 48, /* bgei_f */ + 56, /* bgei_f */ 24, /* bgtr_f */ - 48, /* bgti_f */ + 56, /* bgti_f */ 28, /* bner_f */ - 52, /* bnei_f */ + 60, /* bnei_f */ 28, /* bunltr_f */ - 52, /* bunlti_f */ + 60, /* bunlti_f */ 28, /* bunler_f */ - 52, /* bunlei_f */ + 60, /* bunlei_f */ 28, /* buneqr_f */ - 52, /* buneqi_f */ + 60, /* buneqi_f */ 28, /* bunger_f */ - 52, /* bungei_f */ + 60, /* bungei_f */ 28, /* bungtr_f */ - 52, /* bungti_f */ + 60, /* bungti_f */ 28, /* bltgtr_f */ - 52, /* bltgti_f */ + 60, /* bltgti_f */ 12, /* bordr_f */ - 36, /* bordi_f */ + 44, /* bordi_f */ 12, /* bunordr_f */ - 36, /* bunordi_f */ + 44, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -304,87 +350,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 8, /* addr_d */ - 28, /* addi_d */ + 40, /* addi_d */ 8, /* subr_d */ - 28, /* subi_d */ - 28, /* rsbi_d */ + 40, /* subi_d */ + 40, /* rsbi_d */ 8, /* mulr_d */ - 28, /* muli_d */ + 40, /* muli_d */ 8, /* divr_d */ - 28, /* divi_d */ + 40, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 8, /* sqrtr_d */ 32, /* ltr_d */ - 52, /* lti_d */ + 64, /* lti_d */ 32, /* ler_d */ - 52, /* lei_d */ + 64, /* lei_d */ 32, /* eqr_d */ - 52, /* eqi_d */ + 64, /* eqi_d */ 32, /* ger_d */ - 52, /* gei_d */ + 64, /* gei_d */ 32, /* gtr_d */ - 52, /* gti_d */ + 64, /* gti_d */ 32, /* ner_d */ - 52, /* nei_d */ + 64, /* nei_d */ 32, /* unltr_d */ - 52, /* unlti_d */ + 64, /* unlti_d */ 32, /* unler_d */ - 52, /* unlei_d */ + 64, /* unlei_d */ 32, /* uneqr_d */ - 52, /* uneqi_d */ + 64, /* uneqi_d */ 32, /* unger_d */ - 52, /* ungei_d */ + 64, /* ungei_d */ 32, /* ungtr_d */ - 52, /* ungti_d */ + 64, /* ungti_d */ 32, /* ltgtr_d */ - 52, /* ltgti_d */ + 64, /* ltgti_d */ 20, /* ordr_d */ - 40, /* ordi_d */ + 52, /* ordi_d */ 20, /* unordr_d */ - 40, /* unordi_d */ + 52, /* unordi_d */ 16, /* truncr_d_i */ 16, /* truncr_d_l */ 12, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 32, /* movi_d */ 4, /* ldr_d */ 32, /* ldi_d */ 8, /* ldxr_d */ - 4, /* ldxi_d */ + 36, /* ldxi_d */ 4, /* str_d */ 32, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 36, /* stxi_d */ 24, /* bltr_d */ - 44, /* blti_d */ + 56, /* blti_d */ 24, /* bler_d */ - 44, /* blei_d */ + 56, /* blei_d */ 24, /* beqr_d */ - 44, /* beqi_d */ + 56, /* beqi_d */ 24, /* bger_d */ - 44, /* bgei_d */ + 56, /* bgei_d */ 24, /* bgtr_d */ - 44, /* bgti_d */ + 56, /* bgti_d */ 28, /* bner_d */ - 48, /* bnei_d */ + 60, /* bnei_d */ 28, /* bunltr_d */ - 48, /* bunlti_d */ + 60, /* bunlti_d */ 28, /* bunler_d */ - 48, /* bunlei_d */ + 60, /* bunlei_d */ 28, /* buneqr_d */ - 48, /* buneqi_d */ + 60, /* buneqi_d */ 28, /* bunger_d */ - 48, /* bungei_d */ + 60, /* bungei_d */ 28, /* bungtr_d */ - 48, /* bungti_d */ + 60, /* bungti_d */ 28, /* bltgtr_d */ - 48, /* bltgti_d */ + 60, /* bltgti_d */ 12, /* bordr_d */ - 32, /* bordi_d */ + 44, /* bordi_d */ 12, /* bunordr_d */ - 32, /* bunordi_d */ + 44, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index cb8f38210..f6b710e81 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -246,20 +246,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -319,7 +317,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -361,18 +359,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function != NULL); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += 8; } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -417,7 +419,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _A0 - v->u.w); @@ -429,7 +431,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _A0 - v->u.w); @@ -441,7 +443,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _A0 - v->u.w); @@ -453,7 +455,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _A0 - v->u.w); @@ -465,7 +467,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, _A0 - v->u.w); @@ -477,7 +479,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _A0 - v->u.w); @@ -489,7 +491,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _A0 - v->u.w); @@ -499,10 +501,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_A0 - v->u.w, u); else @@ -511,11 +513,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_A0 - v->u.w, u); else { @@ -610,10 +612,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function != NULL); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_A0 - _jitc->function->call.argi, u); @@ -627,11 +629,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_int64_t u) +_jit_pushargi(jit_state_t *_jit, jit_int64_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function != NULL); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_A0 - _jitc->function->call.argi, u); @@ -863,6 +865,10 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t const_offset; jit_int32_t patch_offset; } undo; @@ -965,11 +971,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1073,12 +1081,25 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1336,7 +1357,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (_s21_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1365,6 +1391,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1382,6 +1412,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1403,11 +1443,18 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1415,10 +1462,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index b6ee2605b..12f9a2f7d 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -36,6 +36,7 @@ # define jit_armv5_p() (jit_cpu.version >= 5) # define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend)) # define jit_armv6_p() (jit_cpu.version >= 6) +# define jit_armv7_p() (jit_cpu.version >= 7) # define jit_armv7r_p() 0 # define stack_framesize 48 extern int __aeabi_idivmod(int, int); @@ -179,7 +180,23 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */ # define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */ # define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */ +# define ARM_LDREX 0x01900090 +# define THUMB2_LDREX 0xe8500000 +# define ARM_STREX 0x01800090 +# define THUMB2_STREX 0xe8400000 /* << ARMv6* */ +/* >> ARMv7 */ +# define ARM_DMB 0xf57ff050 +# define THUMB2_DMB 0xf3bf8f50 +# define DMB_SY 0xf +# define DMB_ST 0xe +# define DMB_ISH 0xb +# define DMB_ISHST 0xa +# define DMB_NSH 0x7 +# define DMB_NSHT 0x6 +# define DMB_OSH 0x3 +# define DMB_OSHST 0x2 +/* << ARMv7 */ # define ARM_SHIFT 0x01a00000 # define ARM_R 0x00000010 /* register shift */ # define ARM_LSL 0x00000000 @@ -399,6 +416,12 @@ static void _tcit(jit_state_t*,unsigned int,int); static void _tpp(jit_state_t*,int,int); # define torl(o,rn,im) _torl(_jit,o,rn,im) static void _torl(jit_state_t*,int,int,int) maybe_unused; +# define DMB(im) dmb(im) +# define T2_DMB(im) tdmb(im) +# define dmb(im) _dmb(_jit, im) +static void _dmb(jit_state_t *_jit, int im); +# define tdmb(im) _tdmb(_jit, im) +static void _tdmb(jit_state_t *_jit, int im); # define CC_MOV(cc,rd,rm) corrr(cc,ARM_MOV,0,rd,rm) # define MOV(rd,rm) CC_MOV(ARM_CC_AL,rd,rm) # define T1_MOV(rd,rm) is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7)) @@ -612,7 +635,7 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CMNI(rn,im) CC_CMNI(ARM_CC_AL,rn,im) # define T2_CMNI(rn,im) torri(THUMB2_CMNI,rn,_R15_REGNO,im) # define CC_TST(cc,rn,rm) corrr(cc,ARM_TST,rn,r0,rm) -# define TST(rn,rm) CC_TST(ARM_CC_AL,rn,rm) +# define TST(rn,rm) corrr(ARM_CC_AL,ARM_TST,rn,0,rm) # define T1_TST(rn,rm) is(THUMB_TST|(_u3(rm)<<3)|_u3(rn)) # define T2_TST(rn,rm) torrr(THUMB2_TST,rn,_R15_REGNO,rm) # define CC_TSTI(cc,rn,im) corri(cc,ARM_TST|ARM_I,rn,0,im) @@ -718,6 +741,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) # define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im) # define T2_LDRDIN(rt,rt2,rn,im) torrri8(THUMB2_LDRDI,rn,rt,rt2,im) +# define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf) +# define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn) +# define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im) # define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm) # define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm) # define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) @@ -771,6 +797,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im) # define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im) # define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im) +# define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt) +# define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn) +# define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im) # define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im) # define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im) # define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im) @@ -843,6 +872,15 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0,r1) _comr(_jit,r0,r1) static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define negr(r0,r1) _negr(_jit,r0,r1) @@ -1091,15 +1129,10 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) _extr_uc(_jit,r0,r1) @@ -1509,6 +1542,22 @@ _torl(jit_state_t *_jit, int o, int rn, int im) iss(thumb.s[0], thumb.s[1]); } +static void +_dmb(jit_state_t *_jit, int im) +{ + assert(!(im & 0xfffffff0)); + ii(ARM_DMB|im); +} + +static void +_tdmb(jit_state_t *_jit, int im) +{ + jit_thumb_t thumb; + assert(!(im & 0xfffffff0)); + thumb.i = THUMB2_DMB | im; + iss(thumb.s[0], thumb.s[1]); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1582,6 +1631,86 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movznr(jit_state_t *_jit, int ct, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2) +{ + if (jit_thumb_p()) { + if (r2 < 7) + T1_CMPI(r2, 0); + else + T2_CMPI(r2, 0); + IT(ct); + T1_MOV(r0, r1); + } else { + CMPI(r2, 0); + CC_MOV(ct, r0, r1); + } +} + +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + _movznr(_jit, ARM_CC_NE, r0, r1, r2); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + _movznr(_jit, ARM_CC_EQ, r0, r1, r2); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if (!jit_armv7_p()) + fallback_casx(r0, r1, r2, r3, i0); + else { + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + if (jit_thumb_p()) { + T2_DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + T2_LDREX(r0, r1, 0); + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ + T2_STREX(r0, r3, r1, 0); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + /* done: */ + done = _jit->pc.w; + T2_DMB(DMB_ISH); + } + else { + DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + LDREX(r0, r1); + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ + STREX(r0, r3, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + /* done: */ + done = _jit->pc.w; + DMB(DMB_ISH); + } + patch_at(arm_patch_jump, jump0, done); + patch_at(arm_patch_jump, jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); + } +} + static void _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3576,11 +3705,9 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; if (jit_thumb_p()) { if ((r0|r1) < 8) T1_REV(r0, r1); @@ -3594,20 +3721,14 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) rshi_u(r0, r0, 16); } else { - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + generic_bswapr_us(_jit, r0, r1); } } } /* inline glibc htonl (without register clobber) */ static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_thumb_p()) { @@ -3629,7 +3750,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } } } -#endif static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) diff --git a/deps/lightning/lib/jit_arm-swf.c b/deps/lightning/lib/jit_arm-swf.c index bf86ca1cc..c88f9e3cb 100644 --- a/deps/lightning/lib/jit_arm-swf.c +++ b/deps/lightning/lib/jit_arm-swf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 9f0d01282..2c85904ec 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -1,12 +1,13 @@ #if __WORDSIZE == 32 #if defined(__ARM_PCS_VFP) -#define JIT_INSTR_MAX 48 +#define JIT_INSTR_MAX 50 0, /* data */ 0, /* live */ - 2, /* align */ + 14, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ @@ -15,7 +16,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -23,8 +27,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 16, /* va_arg_d */ @@ -48,18 +64,18 @@ 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 40, /* divr */ - 48, /* divi */ - 40, /* divr_u */ - 44, /* divi_u */ - 34, /* qdivr */ - 38, /* qdivi */ - 34, /* qdivr_u */ - 38, /* qdivi_u */ - 40, /* remr */ - 48, /* remi */ - 40, /* remr_u */ - 44, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ 12, /* andi */ 4, /* orr */ @@ -96,12 +112,19 @@ 14, /* nei */ 4, /* movr */ 8, /* movi */ + 8, /* movnr */ + 8, /* movzr */ + 42, /* casr */ + 50, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 8, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -194,13 +217,37 @@ 4, /* callr */ 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -349,7 +396,7 @@ 8, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 16, /* movi_d */ + 32, /* movi_d */ 4, /* ldr_d */ 12, /* ldi_d */ 8, /* ldxr_d */ @@ -363,7 +410,7 @@ 12, /* bler_d */ 28, /* blei_d */ 12, /* beqr_d */ - 28, /* beqi_d */ + 36, /* beqi_d */ 12, /* bger_d */ 28, /* bgei_d */ 12, /* bgtr_d */ @@ -408,9 +455,10 @@ #define JIT_INSTR_MAX 160 0, /* data */ 0, /* live */ - 2, /* align */ + 18, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ @@ -419,7 +467,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -427,8 +478,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 28, /* va_arg_d */ @@ -452,18 +515,18 @@ 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 40, /* divr */ - 48, /* divi */ - 40, /* divr_u */ - 44, /* divi_u */ - 34, /* qdivr */ - 38, /* qdivi */ - 34, /* qdivr_u */ - 38, /* qdivi_u */ - 40, /* remr */ - 48, /* remi */ - 40, /* remr_u */ - 44, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ 12, /* andi */ 4, /* orr */ @@ -500,12 +563,19 @@ 14, /* nei */ 4, /* movr */ 8, /* movi */ + 8, /* movnr */ + 8, /* movzr */ + 42, /* casr */ + 46, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 20, /* htonr_us */ 16, /* htonr_ui */ 0, /* htonr_ul */ @@ -598,13 +668,37 @@ 4, /* callr */ 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -617,52 +711,52 @@ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ - 40, /* addr_f */ - 40, /* addi_f */ - 40, /* subr_f */ - 40, /* subi_f */ - 40, /* rsbi_f */ - 40, /* mulr_f */ - 40, /* muli_f */ - 40, /* divr_f */ - 40, /* divi_f */ + 24, /* addr_f */ + 24, /* addi_f */ + 24, /* subr_f */ + 24, /* subi_f */ + 24, /* rsbi_f */ + 24, /* mulr_f */ + 24, /* muli_f */ + 24, /* divr_f */ + 24, /* divi_f */ 12, /* negr_f */ 12, /* absr_f */ - 36, /* sqrtr_f */ - 40, /* ltr_f */ - 44, /* lti_f */ - 40, /* ler_f */ - 44, /* lei_f */ - 40, /* eqr_f */ - 44, /* eqi_f */ - 40, /* ger_f */ - 44, /* gei_f */ - 40, /* gtr_f */ - 44, /* gti_f */ - 44, /* ner_f */ - 48, /* nei_f */ - 72, /* unltr_f */ - 80, /* unlti_f */ - 72, /* unler_f */ - 80, /* unlei_f */ - 72, /* uneqr_f */ - 80, /* uneqi_f */ - 72, /* unger_f */ - 80, /* ungei_f */ - 72, /* ungtr_f */ - 80, /* ungti_f */ - 76, /* ltgtr_f */ - 84, /* ltgti_f */ - 44, /* ordr_f */ - 48, /* ordi_f */ - 72, /* unordr_f */ - 80, /* unordi_f */ - 36, /* truncr_f_i */ + 20, /* sqrtr_f */ + 24, /* ltr_f */ + 30, /* lti_f */ + 24, /* ler_f */ + 32, /* lei_f */ + 24, /* eqr_f */ + 30, /* eqi_f */ + 24, /* ger_f */ + 30, /* gei_f */ + 24, /* gtr_f */ + 30, /* gti_f */ + 28, /* ner_f */ + 32, /* nei_f */ + 56, /* unltr_f */ + 64, /* unlti_f */ + 56, /* unler_f */ + 64, /* unlei_f */ + 56, /* uneqr_f */ + 64, /* uneqi_f */ + 56, /* unger_f */ + 64, /* ungei_f */ + 56, /* ungtr_f */ + 64, /* ungti_f */ + 60, /* ltgtr_f */ + 68, /* ltgti_f */ + 28, /* ordr_f */ + 32, /* ordi_f */ + 56, /* unordr_f */ + 64, /* unordi_f */ + 20, /* truncr_f_i */ 0, /* truncr_f_l */ - 36, /* extr_f */ - 38, /* extr_d_f */ + 28, /* extr_f */ + 22, /* extr_d_f */ 8, /* movr_f */ - 12, /* movi_f */ + 16, /* movi_f */ 8, /* ldr_f */ 16, /* ldi_f */ 8, /* ldxr_f */ @@ -671,34 +765,34 @@ 16, /* sti_f */ 8, /* stxr_f */ 16, /* stxi_f */ - 44, /* bltr_f */ - 48, /* blti_f */ - 44, /* bler_f */ - 48, /* blei_f */ - 44, /* beqr_f */ - 52, /* beqi_f */ - 44, /* bger_f */ - 48, /* bgei_f */ - 44, /* bgtr_f */ - 48, /* bgti_f */ - 44, /* bner_f */ - 48, /* bnei_f */ - 44, /* bunltr_f */ - 48, /* bunlti_f */ - 44, /* bunler_f */ - 48, /* bunlei_f */ - 76, /* buneqr_f */ - 84, /* buneqi_f */ - 44, /* bunger_f */ - 48, /* bungei_f */ - 44, /* bungtr_f */ - 48, /* bungti_f */ - 76, /* bltgtr_f */ - 84, /* bltgti_f */ - 44, /* bordr_f */ - 48, /* bordi_f */ - 44, /* bunordr_f */ - 48, /* bunordi_f */ + 28, /* bltr_f */ + 32, /* blti_f */ + 28, /* bler_f */ + 32, /* blei_f */ + 28, /* beqr_f */ + 48, /* beqi_f */ + 28, /* bger_f */ + 32, /* bgei_f */ + 28, /* bgtr_f */ + 32, /* bgti_f */ + 28, /* bner_f */ + 32, /* bnei_f */ + 28, /* bunltr_f */ + 32, /* bunlti_f */ + 28, /* bunler_f */ + 32, /* bunlei_f */ + 60, /* buneqr_f */ + 68, /* buneqi_f */ + 28, /* bunger_f */ + 32, /* bungei_f */ + 28, /* bungtr_f */ + 32, /* bungti_f */ + 60, /* bltgtr_f */ + 68, /* bltgti_f */ + 28, /* bordr_f */ + 32, /* bordi_f */ + 28, /* bunordr_f */ + 32, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -708,52 +802,52 @@ 0, /* getarg_d */ 0, /* putargr_d */ 0, /* putargi_d */ - 50, /* addr_d */ - 52, /* addi_d */ - 50, /* subr_d */ - 52, /* subi_d */ - 52, /* rsbi_d */ - 50, /* mulr_d */ - 52, /* muli_d */ - 50, /* divr_d */ - 52, /* divi_d */ + 34, /* addr_d */ + 36, /* addi_d */ + 34, /* subr_d */ + 36, /* subi_d */ + 36, /* rsbi_d */ + 34, /* mulr_d */ + 36, /* muli_d */ + 34, /* divr_d */ + 36, /* divi_d */ 20, /* negr_d */ 20, /* absr_d */ - 42, /* sqrtr_d */ - 44, /* ltr_d */ - 48, /* lti_d */ - 44, /* ler_d */ - 48, /* lei_d */ - 44, /* eqr_d */ - 48, /* eqi_d */ - 44, /* ger_d */ - 48, /* gei_d */ - 44, /* gtr_d */ - 48, /* gti_d */ - 48, /* ner_d */ - 52, /* nei_d */ - 82, /* unltr_d */ - 88, /* unlti_d */ - 82, /* unler_d */ - 88, /* unlei_d */ - 82, /* uneqr_d */ - 88, /* uneqi_d */ - 82, /* unger_d */ - 88, /* ungei_d */ - 82, /* ungtr_d */ - 88, /* ungti_d */ - 86, /* ltgtr_d */ - 92, /* ltgti_d */ - 48, /* ordr_d */ - 52, /* ordi_d */ - 82, /* unordr_d */ - 88, /* unordi_d */ - 36, /* truncr_d_i */ + 26, /* sqrtr_d */ + 28, /* ltr_d */ + 34, /* lti_d */ + 28, /* ler_d */ + 36, /* lei_d */ + 28, /* eqr_d */ + 34, /* eqi_d */ + 28, /* ger_d */ + 34, /* gei_d */ + 28, /* gtr_d */ + 34, /* gti_d */ + 32, /* ner_d */ + 36, /* nei_d */ + 66, /* unltr_d */ + 72, /* unlti_d */ + 66, /* unler_d */ + 72, /* unlei_d */ + 66, /* uneqr_d */ + 72, /* uneqi_d */ + 66, /* unger_d */ + 72, /* ungei_d */ + 66, /* ungtr_d */ + 72, /* ungti_d */ + 70, /* ltgtr_d */ + 76, /* ltgti_d */ + 32, /* ordr_d */ + 36, /* ordi_d */ + 66, /* unordr_d */ + 72, /* unordi_d */ + 20, /* truncr_d_i */ 0, /* truncr_d_l */ 36, /* extr_d */ - 38, /* extr_f_d */ + 22, /* extr_f_d */ 16, /* movr_d */ - 20, /* movi_d */ + 32, /* movi_d */ 16, /* ldr_d */ 24, /* ldi_d */ 20, /* ldxr_d */ @@ -762,34 +856,34 @@ 24, /* sti_d */ 20, /* stxr_d */ 28, /* stxi_d */ - 48, /* bltr_d */ - 52, /* blti_d */ - 48, /* bler_d */ - 52, /* blei_d */ - 48, /* beqr_d */ - 60, /* beqi_d */ - 48, /* bger_d */ - 52, /* bgei_d */ - 48, /* bgtr_d */ - 52, /* bgti_d */ - 48, /* bner_d */ - 52, /* bnei_d */ - 48, /* bunltr_d */ - 52, /* bunlti_d */ - 48, /* bunler_d */ - 52, /* bunlei_d */ - 84, /* buneqr_d */ - 92, /* buneqi_d */ - 48, /* bunger_d */ - 52, /* bungei_d */ - 48, /* bungtr_d */ - 52, /* bungti_d */ - 84, /* bltgtr_d */ - 92, /* bltgti_d */ - 48, /* bordr_d */ - 52, /* bordi_d */ - 48, /* bunordr_d */ - 52, /* bunordi_d */ + 32, /* bltr_d */ + 36, /* blti_d */ + 32, /* bler_d */ + 36, /* blei_d */ + 32, /* beqr_d */ + 52, /* beqi_d */ + 32, /* bger_d */ + 36, /* bgei_d */ + 32, /* bgtr_d */ + 36, /* bgti_d */ + 32, /* bner_d */ + 36, /* bnei_d */ + 32, /* bunltr_d */ + 36, /* bunlti_d */ + 32, /* bunler_d */ + 36, /* bunlei_d */ + 68, /* buneqr_d */ + 76, /* buneqi_d */ + 32, /* bunger_d */ + 36, /* bungei_d */ + 32, /* bungtr_d */ + 36, /* bungti_d */ + 68, /* bltgtr_d */ + 76, /* bltgti_d */ + 32, /* bordr_d */ + 36, /* bordi_d */ + 32, /* bunordr_d */ + 36, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ diff --git a/deps/lightning/lib/jit_arm-vfp.c b/deps/lightning/lib/jit_arm-vfp.c index 743a3ef53..4b146d255 100644 --- a/deps/lightning/lib/jit_arm-vfp.c +++ b/deps/lightning/lib/jit_arm-vfp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index c2019ddb7..322d85132 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -59,8 +59,8 @@ typedef jit_pointer_t jit_va_list; /* * Prototypes */ -#define jit_make_arg(node) _jit_make_arg(_jit,node) -static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*); +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); #define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); #define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) @@ -90,6 +90,7 @@ extern void __clear_cache(void *, void *); # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef PROTO /* @@ -326,20 +327,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -421,7 +420,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code != jit_code_arg) { + if (!(u->code >= jit_code_arg_c && u->code <= jit_code_arg)) { if (u->code == jit_code_arg_f) { if (jit_cpu.abi) return (jit_arg_f_reg_p(u->u.w)); @@ -436,7 +435,7 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) } static jit_node_t * -_jit_make_arg(jit_state_t *_jit, jit_node_t *node) +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) { jit_int32_t offset; if (jit_arg_reg_p(_jitc->function->self.argi)) @@ -446,7 +445,7 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node) _jitc->function->self.size += sizeof(jit_word_t); } if (node == (jit_node_t *)0) - node = jit_new_node(jit_code_arg); + node = jit_new_node(code); else link_node(node); node->u.w = offset; @@ -558,16 +557,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { assert(_jitc->function); - return (jit_make_arg((jit_node_t*)0)); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif + return (jit_make_arg((jit_node_t*)0, code)); } jit_node_t * _jit_arg_f(jit_state_t *_jit) { assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); return (jit_make_arg_f((jit_node_t*)0)); } @@ -575,13 +579,14 @@ jit_node_t * _jit_arg_d(jit_state_t *_jit) { assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); return (jit_make_arg_d((jit_node_t*)0)); } void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_swf_p()) jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w)); @@ -595,7 +600,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_swf_p()) jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w)); @@ -609,7 +614,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_swf_p()) jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w)); @@ -623,7 +628,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_swf_p()) jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w)); @@ -637,7 +642,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_swf_p()) jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w)); @@ -649,10 +654,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_swf_p()) jit_stxi(arg_offset(v->u.w), JIT_FP, u); else if (jit_arg_reg_p(v->u.w)) @@ -663,11 +668,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_swf_p()) { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); @@ -842,10 +847,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -859,11 +864,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -1147,6 +1152,10 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_uword_t thumb; jit_int32_t const_offset; jit_int32_t patch_offset; @@ -1380,11 +1389,16 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + if (jit_thumb_p()) + nop((node->u.w + 1) & ~1); + else + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: if (must_align_p(node->next)) @@ -1479,11 +1493,23 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _i); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); + case_rrr(movn,); + case_rrr(movz,); case jit_code_movi: if (node->flag & jit_flag_node) { temp = node->v.n; @@ -1745,7 +1771,9 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w, 1); + word = jmpi_p(_jit->pc.w, + _s24P(_jit->code.length - + (_jit->pc.uc - _jit->code.ptr))); patch(word, node); } } @@ -1775,6 +1803,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.data = _jitc->consts.data; undo.thumb = _jitc->thumb; undo.const_offset = _jitc->consts.offset; @@ -1795,6 +1827,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif invalidate_consts(); _jitc->consts.data = undo.data; _jitc->thumb = undo.thumb; @@ -1862,21 +1904,34 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1966,6 +2021,7 @@ _emit_code(jit_state_t *_jit) # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c new file mode 100644 index 000000000..17ecd60d9 --- /dev/null +++ b/deps/lightning/lib/jit_fallback.c @@ -0,0 +1,145 @@ +#if PROTO +#define fallback_save(r0) _fallback_save(_jit, r0) +static void _fallback_save(jit_state_t*, jit_int32_t); +#define fallback_load(r0) _fallback_load(_jit, r0) +static void _fallback_load(jit_state_t*, jit_int32_t); +#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0) +static void _fallback_save_regs(jit_state_t*, jit_int32_t); +#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0) +static void _fallback_load_regs(jit_state_t*, jit_int32_t); +#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1) +static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t); +#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im) +static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_word_t); +#endif + +#if CODE +static void +_fallback_save(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + stxi(_jitc->function->regoff[JIT_R(offset)], rn(JIT_FP), regno); + break; + } + } +} + +static void +_fallback_load(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + ldxi(regno, rn(JIT_FP), _jitc->function->regoff[JIT_R(offset)]); + break; + } + } +} + +static void +_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t regno, spec; + for (regno = 0; regno < _jitc->reglen; regno++) { + spec = _rvs[regno].spec; + if ((jit_regset_tstbit(&_jitc->regarg, regno) || + jit_regset_tstbit(&_jitc->reglive, regno)) && + !(spec & jit_class_sav)) { + if (!_jitc->function->regoff[regno]) { + _jitc->function->regoff[regno] = + jit_allocai(spec & jit_class_gpr ? + sizeof(jit_word_t) : sizeof(jit_float64_t)); + _jitc->again = 1; + } + if ((spec & jit_class_gpr) && rn(regno) == r0) + continue; + jit_regset_setbit(&_jitc->regsav, regno); + if (spec & jit_class_gpr) + emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno); + else + emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno); + } + } +} + +static void +_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t regno, spec; + for (regno = 0; regno < _jitc->reglen; regno++) { + spec = _rvs[regno].spec; + if ((jit_regset_tstbit(&_jitc->regarg, regno) || + jit_regset_tstbit(&_jitc->reglive, regno)) && + !(spec & jit_class_sav)) { + if ((spec & jit_class_gpr) && rn(regno) == r0) + continue; + jit_regset_setbit(&_jitc->regsav, regno); + if (spec & jit_class_gpr) + emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]); + else + emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]); + } + } +} + +static void +_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) +{ +# if defined(__arm__) + movi(rn(_R0), i1); +# elif defined(__hppa__) + movi(_R26_REGNO, i1); +# endif + calli(i0); +} + +static void +_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t jump, done; + /* XXX only attempts to fallback cas for lightning jit code */ + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + if ((iscasi = r1 == _NOREG)) { + r1_reg = jit_get_reg(jit_class_gpr|jit_class_sav); + r1 = rn(r1_reg); + movi(r1, i0); + } + fallback_save_regs(r0); + fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex); + fallback_load(r1); + ldr(r0, r1); + fallback_load(r2); + eqr(r0, r0, r2); + fallback_save(r0); + jump = bnei(_jit->pc.w, r0, 1); + fallback_load(r3); +# if __WORDSIZE == 32 + str_i(r1, r3); +# else + str_l(r1, r3); +# endif + /* done: */ + done = _jit->pc.w; + fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); + fallback_load(r0); +# if defined(__arm__) + patch_at(arm_patch_jump, jump, done); +# else + patch_at(jump, done); +# endif + fallback_load_regs(r0); + if (iscasi) + jit_unget_reg(r1_reg); +} +#endif diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c index db5a36a19..11e49ef0b 100644 --- a/deps/lightning/lib/jit_hppa-cpu.c +++ b/deps/lightning/lib/jit_hppa-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -648,18 +648,23 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) #define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0) #define negr(r0,r1) SUB(_R0_REGNO,r1,r0) #define extr_c(r0,r1) EXTRWR(r1,31,8,r0) #define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0) #define extr_s(r0,r1) EXTRWR(r1,31,16,r0) #define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0) -#if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) -#else -# error need htonr implementation -#endif +#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1) +#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1) #define addr(r0,r1,r2) ADD(r1,r2,r0) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -907,7 +912,7 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); #define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); #define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); #define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -1633,6 +1638,72 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + COPY(r1, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + COPY(r1, r0); + patch_at(w, _jit->pc.w); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ +#if defined(__linux__) && defined(SYS_atomic_cmpxchg_32) && __WORDSIZE == 32 + /* Not defined, and unlikely to ever be defined, but could be a way to do it */ + movi(_R26_REGNO, SYS_atomic_cmpxchg_32); + if (r1 == _NOREG) + movi(_R25_REGNO, i0); + else + movr(_R25_REGNO, r1); + movr(_R24_REGNO, r2); + movr(_R23_REGNO, r3); + /* Should only fail for an invalid or unaligned address. + * Do not handle this condition. */ + calli(syscall); + movr(r0, _R28_REGNO); +#else + /* + * The only atomic operations are LDCW and LDCD, that load a value, + * and store zero at the address atomically. The (semaphore) address + * must be 16 byte aligned. + */ + fallback_casx(r0, r1, r2, r3, i0); + /* + * It is important to be aware of the delayed nature of cache flush and + * purge operations, and to use SYNC instructions to force completion + * where necessary. The following example illustrates this. + * Consider two processes sharing a memory location x which is protected + * by a semaphore s. + * + * process A on Processor 1 | process B on Processor 2 | note + * -------------------------+---------------------------+------------ + * LDCW s | | A acquires semaphore + * PDC x | | A executes purge + * SYNC | | Force completion of purge + * STW s | | A releases semaphore + * | LDCW s | B acquires semaphore + * | STW x + * + * In the absence of the SYNC instruction, it would be possible for + * process B's store to x to complete before the purge of x is completed + * (since the purge may have been delayed). The purge of x could then + * destroy the new value. + */ +#endif +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2561,17 +2632,19 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) BV_N(_R0_REGNO, r0); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = ((i0 - _jit->pc.w) >> 2) - 2; - if (w >= -32768 && w <= 32767) - B_N(w, _R0_REGNO); + jit_word_t d, w; + w = _jit->pc.w; + d = ((i0 - w) >> 2) - 2; + if (d >= -32768 && d <= 32767) + B_N(d, _R0_REGNO); else { - movi(_R1_REGNO, w); + movi(_R1_REGNO, d); BV_N(_R0_REGNO, _R1_REGNO); } + return (w); } static jit_word_t diff --git a/deps/lightning/lib/jit_hppa-fpu.c b/deps/lightning/lib/jit_hppa-fpu.c index 5fa68561c..6b2838d11 100644 --- a/deps/lightning/lib/jit_hppa-fpu.c +++ b/deps/lightning/lib/jit_hppa-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index 3c04f6372..4257b6d8b 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -1,20 +1,24 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 64 +#define JIT_INSTR_MAX 196 0, /* data */ 0, /* live */ - 0, /* align */ + 28, /* align */ 0, /* save */ 0, /* load */ + 0, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 64, /* prolog */ + 156, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,11 +26,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 20, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -40,13 +56,13 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ - 16, /* rsbi */ - 28, /* mulr */ - 36, /* muli */ + 12, /* rsbi */ + 48, /* mulr */ + 56, /* muli */ 40, /* qmulr */ 44, /* qmuli */ - 32, /* qmulr_u */ - 40, /* qmuli_u */ + 52, /* qmulr_u */ + 60, /* qmuli_u */ 36, /* divr */ 40, /* divi */ 36, /* divr_u */ @@ -95,15 +111,22 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 88, /* casr */ + 96, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 28, /* bswapr_us */ + 68, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ - 0, /* htonr_l */ + 0, /* htonr_ul */ 8, /* ldr_c */ 12, /* ldi_c */ 4, /* ldr_uc */ @@ -119,15 +142,15 @@ 0, /* ldr_l */ 0, /* ldi_l */ 8, /* ldxr_c */ - 8, /* ldxi_c */ + 12, /* ldxi_c */ 4, /* ldxr_uc */ - 4, /* ldxi_uc */ + 8, /* ldxi_uc */ 8, /* ldxr_s */ - 8, /* ldxi_s */ + 12, /* ldxi_s */ 4, /* ldxr_us */ - 4, /* ldxi_us */ + 8, /* ldxi_us */ 4, /* ldxr_i */ - 4, /* ldxi_i */ + 8, /* ldxi_i */ 0, /* ldxr_ui */ 0, /* ldxi_ui */ 0, /* ldxr_l */ @@ -141,11 +164,11 @@ 0, /* str_l */ 0, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 12, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 12, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 12, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ 8, /* bltr */ @@ -188,18 +211,42 @@ 16, /* bxsubi */ 16, /* bxsubr_u */ 20, /* bxsubi_u */ - 0, /* jmpr */ + 4, /* jmpr */ 12, /* jmpi */ 40, /* callr */ 44, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -207,93 +254,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 64, /* epilog */ + 196, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 4, /* addr_f */ - 16, /* addi_f */ + 24, /* addi_f */ 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ + 24, /* subi_f */ + 24, /* rsbi_f */ 4, /* mulr_f */ - 16, /* muli_f */ + 24, /* muli_f */ 4, /* divr_f */ - 16, /* divi_f */ + 24, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ 16, /* ltr_f */ - 28, /* lti_f */ + 36, /* lti_f */ 16, /* ler_f */ - 28, /* lei_f */ + 36, /* lei_f */ 16, /* eqr_f */ - 28, /* eqi_f */ + 36, /* eqi_f */ 16, /* ger_f */ - 28, /* gei_f */ + 36, /* gei_f */ 16, /* gtr_f */ - 28, /* gti_f */ + 36, /* gti_f */ 16, /* ner_f */ - 28, /* nei_f */ + 36, /* nei_f */ 16, /* unltr_f */ - 28, /* unlti_f */ + 36, /* unlti_f */ 16, /* unler_f */ - 28, /* unlei_f */ + 36, /* unlei_f */ 16, /* uneqr_f */ - 28, /* uneqi_f */ + 36, /* uneqi_f */ 16, /* unger_f */ - 28, /* ungei_f */ + 36, /* ungei_f */ 16, /* ungtr_f */ - 28, /* ungti_f */ + 36, /* ungti_f */ 16, /* ltgtr_f */ - 28, /* ltgti_f */ + 36, /* ltgti_f */ 16, /* ordr_f */ - 28, /* ordi_f */ + 36, /* ordi_f */ 16, /* unordr_f */ - 28, /* unordi_f */ - 12, /* truncr_f_i */ + 36, /* unordi_f */ + 16, /* truncr_f_i */ 0, /* truncr_f_l */ - 12, /* extr_f */ + 20, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 12, /* movi_f */ + 20, /* movi_f */ 4, /* ldr_f */ 12, /* ldi_f */ - 4, /* ldxr_f */ - 4, /* ldxi_f */ + 8, /* ldxr_f */ + 12, /* ldxi_f */ 4, /* str_f */ 12, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 12, /* stxi_f */ 16, /* bltr_f */ - 28, /* blti_f */ + 36, /* blti_f */ 16, /* bler_f */ - 28, /* blei_f */ + 36, /* blei_f */ 16, /* beqr_f */ - 28, /* beqi_f */ + 36, /* beqi_f */ 16, /* bger_f */ - 28, /* bgei_f */ + 36, /* bgei_f */ 16, /* bgtr_f */ - 28, /* bgti_f */ + 36, /* bgti_f */ 16, /* bner_f */ - 28, /* bnei_f */ + 36, /* bnei_f */ 16, /* bunltr_f */ - 28, /* bunlti_f */ + 36, /* bunlti_f */ 16, /* bunler_f */ - 28, /* bunlei_f */ + 36, /* bunlei_f */ 16, /* buneqr_f */ - 28, /* buneqi_f */ + 36, /* buneqi_f */ 16, /* bunger_f */ - 28, /* bungei_f */ + 36, /* bungei_f */ 16, /* bungtr_f */ - 28, /* bungti_f */ + 36, /* bungti_f */ 16, /* bltgtr_f */ - 28, /* bltgti_f */ + 36, /* bltgti_f */ 16, /* bordr_f */ - 28, /* bordi_f */ + 36, /* bordi_f */ 16, /* bunordr_f */ - 28, /* bunordi_f */ + 36, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -304,87 +351,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 24, /* addi_d */ + 32, /* addi_d */ 4, /* subr_d */ - 24, /* subi_d */ - 24, /* rsbi_d */ + 32, /* subi_d */ + 32, /* rsbi_d */ 4, /* mulr_d */ - 24, /* muli_d */ + 32, /* muli_d */ 4, /* divr_d */ - 24, /* divi_d */ + 32, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 36, /* lti_d */ + 44, /* lti_d */ 16, /* ler_d */ - 36, /* lei_d */ + 44, /* lei_d */ 16, /* eqr_d */ - 36, /* eqi_d */ + 44, /* eqi_d */ 16, /* ger_d */ - 36, /* gei_d */ + 44, /* gei_d */ 16, /* gtr_d */ - 36, /* gti_d */ + 44, /* gti_d */ 16, /* ner_d */ - 36, /* nei_d */ + 44, /* nei_d */ 16, /* unltr_d */ - 36, /* unlti_d */ + 44, /* unlti_d */ 16, /* unler_d */ - 36, /* unlei_d */ + 44, /* unlei_d */ 16, /* uneqr_d */ - 36, /* uneqi_d */ + 44, /* uneqi_d */ 16, /* unger_d */ - 36, /* ungei_d */ + 44, /* ungei_d */ 16, /* ungtr_d */ - 36, /* ungti_d */ + 44, /* ungti_d */ 16, /* ltgtr_d */ - 36, /* ltgti_d */ + 44, /* ltgti_d */ 16, /* ordr_d */ - 36, /* ordi_d */ + 44, /* ordi_d */ 16, /* unordr_d */ - 36, /* unordi_d */ - 12, /* truncr_d_i */ + 44, /* unordi_d */ + 16, /* truncr_d_i */ 0, /* truncr_d_l */ - 12, /* extr_d */ + 20, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 12, /* ldi_d */ - 4, /* ldxr_d */ - 4, /* ldxi_d */ + 8, /* ldxr_d */ + 12, /* ldxi_d */ 4, /* str_d */ 12, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 12, /* stxi_d */ 16, /* bltr_d */ - 36, /* blti_d */ + 44, /* blti_d */ 16, /* bler_d */ - 36, /* blei_d */ + 44, /* blei_d */ 16, /* beqr_d */ - 36, /* beqi_d */ + 44, /* beqi_d */ 16, /* bger_d */ - 36, /* bgei_d */ + 44, /* bgei_d */ 16, /* bgtr_d */ - 36, /* bgti_d */ + 44, /* bgti_d */ 16, /* bner_d */ - 36, /* bnei_d */ + 44, /* bnei_d */ 16, /* bunltr_d */ - 36, /* bunlti_d */ + 44, /* bunlti_d */ 16, /* bunler_d */ - 36, /* bunlei_d */ + 44, /* bunlei_d */ 16, /* buneqr_d */ - 36, /* buneqi_d */ + 44, /* buneqi_d */ 16, /* bunger_d */ - 36, /* bungei_d */ + 44, /* bungei_d */ 16, /* bungtr_d */ - 36, /* bungti_d */ + 44, /* bungti_d */ 16, /* bltgtr_d */ - 36, /* bltgti_d */ + 44, /* bltgti_d */ 16, /* bordr_d */ - 36, /* bordi_d */ + 44, /* bordi_d */ 16, /* bunordr_d */ - 36, /* bunordi_d */ + 44, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index 944bf34df..32fd9d583 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -19,12 +19,17 @@ #include #include +#if defined(__linux__) +# include +# include +#endif #define jit_arg_reg_p(i) (i >= 0 && i < 4) #define PROTO 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -240,18 +245,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -305,7 +310,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - assert(u->code == jit_code_arg || + assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) || u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_reg_p(u->u.w)); } @@ -338,17 +343,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif _jitc->function->self.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else offset = _jitc->function->self.size; - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -401,7 +410,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (v->u.w >= 0) jit_extr_c(u, _R26 - v->u.w); @@ -413,7 +422,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (v->u.w >= 0) jit_extr_uc(u, _R26 - v->u.w); @@ -425,7 +434,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (v->u.w >= 0) jit_extr_s(u, _R26 - v->u.w); @@ -437,7 +446,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (v->u.w >= 0) jit_extr_us(u, _R26 - v->u.w); @@ -449,7 +458,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (v->u.w >= 0) jit_movr(u, _R26 - v->u.w); @@ -459,10 +468,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (v->u.w >= 0) jit_movr(_R26 - v->u.w, u); else @@ -471,11 +480,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (v->u.w >= 0) jit_movi(_R26 - v->u.w, u); else { @@ -570,10 +579,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); _jitc->function->call.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -586,11 +595,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); _jitc->function->call.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -854,6 +863,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; } undo; @@ -956,11 +969,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1013,6 +1028,16 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(movn,); + case_rrr(movz,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1039,6 +1064,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1310,7 +1337,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (word >= -32768 && word <= 32767) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1339,6 +1371,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1356,6 +1392,18 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. + * Note that for hppa use '-' instead of '+' as hppa + * stack grows up */ + undo.func.self.aoff = _jitc->function->frame - + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1374,24 +1422,37 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1436,6 +1497,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index dec14650f..30986e94d 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1175,6 +1175,8 @@ static void _X5(jit_state_t*,jit_word_t, #define ZXT2(r1,r3) I29(0x11,r3,r1) #define ZXT4(r1,r3) I29(0x12,r3,r1) +# define nop(i0) _nop(_jit,i0) +static void _nop(jit_state_t*, jit_int32_t); #define addr(r0,r1,r2) ADD(r0,r1,r2) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1307,17 +1309,20 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); -#if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) MUX1(r0,r1,MUX_REV) -#else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -#endif +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV) #define extr_c(r0,r1) SXT1(r0,r1) #define extr_uc(r0,r1) ZXT1(r0,r1) #define extr_s(r0,r1) SXT2(r0,r1) @@ -1495,7 +1500,7 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); #define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); #define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -1512,7 +1517,7 @@ static void _epilog(jit_state_t*,jit_node_t*); static void _vastart(jit_state_t*, jit_int32_t); # define vaarg(r0, r1) _vaarg(_jit, r0, r1) static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); -#define patch_at(node,instr,label) _patch_at(_jit,node,instr,label) +#define patch_at(code,instr,label) _patch_at(_jit,code,instr,label) static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t); #endif @@ -3026,7 +3031,7 @@ _M29(jit_state_t *_jit, jit_word_t _p, jit_word_t ar, jit_word_t r2) { assert(!(_p & ~0x3fL)); - assert(!(ar & ~0x7L)); + assert(!(ar & ~0x7fL)); assert(!(r2 & ~0x7fL)); TSTREG1(r2); TSTPRED(_p); @@ -3450,6 +3455,16 @@ _X5(jit_state_t *_jit, jit_word_t _p, inst((i1<<36)|(1L<<27)|(y<<26)|(i20<<6)|_p, INST_X); } +static void +_nop(jit_state_t *_jit, jit_int32_t i0) +{ + for (; i0 > 0; i0 -= 8) { + NOP_M(0); + sync(); + } + assert(i0 == 0); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3483,6 +3498,53 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_7); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_6); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + sync(); + MOV_M_ar_rn(AR_CCV, r2); + CMPXCHG8_ACQ(r0, r1, r3); + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); +} + + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 48); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 32); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -3949,48 +4011,6 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} -#endif - static void _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -5125,16 +5145,18 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) BR(BR_6); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t d; + jit_word_t d, w; sync(); - d = ((jit_word_t)i0 - _jit->pc.w) >> 4; + w = _jit->pc.w; + d = ((jit_word_t)i0 - w) >> 4; if (d >= -16777216 && d <= 16777215) BRI(d); else BRL(d); + return (w); } static jit_word_t @@ -5380,14 +5402,16 @@ _patch_at(jit_state_t *_jit, jit_code_t code, i1 = (ic >> 61) & 0x1L; i41 = (ic >> 22) & 0x1ffffffffffL; i20 = ic & 0xfffffL; - assert((tm & ~1) == TM_M_L_X_ && + if (!((tm & ~1) == TM_M_L_X_ && (s2 & 0xfL<<37) == (0xcL<<37) && - s0 == nop_m); + s0 == nop_m)) + goto short_jump; s1 = i41; s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12); s2 |= (i1<<36)|(i20<<13); break; default: + short_jump: /* Only B1 in slot 0 expected due to need to either * a stop to update predicates, or a sync before * unconditional short branch */ diff --git a/deps/lightning/lib/jit_ia64-fpu.c b/deps/lightning/lib/jit_ia64-fpu.c index 19cc381a3..344977ea2 100644 --- a/deps/lightning/lib/jit_ia64-fpu.c +++ b/deps/lightning/lib/jit_ia64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index 59826d99d..56b352f66 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -1,11 +1,11 @@ - #if __WORDSIZE == 64 #define JIT_INSTR_MAX 224 0, /* data */ 0, /* live */ - 0, /* align */ + 48, /* align */ 0, /* save */ 0, /* load */ + 16, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -14,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 16, /* va_start */ 32, /* va_arg */ 32, /* va_arg_d */ @@ -95,14 +110,21 @@ 32, /* nei */ 16, /* movr */ 16, /* movi */ + 16, /* movnr */ + 16, /* movzr */ + 48, /* casr */ + 64, /* casi */ 16, /* extr_c */ 16, /* extr_uc */ 16, /* extr_s */ 16, /* extr_us */ 16, /* extr_i */ 16, /* extr_ui */ - 64, /* htonr_us */ - 160, /* htonr_ui */ + 32, /* bswapr_us */ + 32, /* bswapr_ui */ + 16, /* bswapr_ul */ + 32, /* htonr_us */ + 32, /* htonr_ui */ 16, /* htonr_ul */ 16, /* ldr_c */ 32, /* ldi_c */ @@ -193,13 +215,37 @@ 32, /* callr */ 48, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index 2b23887b1..415e30765 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -344,18 +344,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -409,7 +409,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - assert(u->code == jit_code_arg || + assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) || u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_reg_p(u->u.w)); } @@ -441,18 +441,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -507,7 +511,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _R32 + v->u.w); @@ -519,7 +523,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _R32 + v->u.w); @@ -531,7 +535,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _R32 + v->u.w); @@ -543,7 +547,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _R32 + v->u.w); @@ -555,7 +559,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, _R32 + v->u.w); @@ -567,7 +571,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _R32 + v->u.w); @@ -579,7 +583,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _R32 + v->u.w); @@ -589,10 +593,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_R32 + v->u.w, u); else @@ -601,11 +605,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_R32 + v->u.w, u); else { @@ -712,10 +716,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_OUT0 + _jitc->function->call.argi, u); @@ -729,11 +733,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_OUT0 + _jitc->function->call.argi, u); @@ -972,6 +976,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; jit_word_t prolog_offset; } undo; @@ -1093,9 +1101,14 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - /* nothing done */ + assert(!(node->u.w & (node->u.w - 1))); + sync(); + if (node->u.w > 8) + nop(node->u.w - 8); + break; + case jit_code_skip: + sync(); + nop((node->u.w + 7) & ~7); break; case jit_code_note: case jit_code_name: sync(); @@ -1152,6 +1165,16 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1173,6 +1196,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); @@ -1466,7 +1492,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (word >= -16777216 && word <= 16777215) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1495,6 +1526,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; undo.prolog_offset = _jitc->prolog.offset; restart_function: @@ -1530,6 +1565,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; _jitc->prolog.offset = undo.prolog_offset; _jitc->ioff = 0; @@ -1555,14 +1600,21 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1570,10 +1622,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_loongarch-cpu.c b/deps/lightning/lib/jit_loongarch-cpu.c new file mode 100644 index 000000000..f9e71c953 --- /dev/null +++ b/deps/lightning/lib/jit_loongarch-cpu.c @@ -0,0 +1,2720 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# define ii(i) *_jit->pc.ui++ = (i) +# define can_sign_extend_si12_p(s12) ((s12) <= 2047 && (s12) >= -2048) +# define can_zero_extend_u12_p(u12) ((u12) <= 4095 && (u12) >= 0) +# define can_sign_extend_si16_p(s16) ((s16) <= 32767 && (s16) >= -32768) +# define can_sign_extend_si21_p(s21) ((s21) <= 1048575 && (s21) >= -1048576) +# define can_sign_extend_si26_p(s26) \ + ((s26) <= 33554431 && (s26) >= -33554432) +# define can_sign_extend_si32_p(s32) \ + ((s32) <= 2147483647LL && (s32) >= -2147483648LL) +# define _ZERO_REGNO 0 +# define _RA_REGNO 1 +# define _SP_REGNO 3 +# define _FP_REGNO 22 +# define stack_framesize 160 +# define ldr(u, v) ldr_l(u, v) +# define ldi(u, v) ldi_l(u, v) +# define ldxi(u, v, w) ldxi_l(u, v, w) +# define sti(u, v) sti_l(u, v) +# define stxi(u, v, w) stxi_l(u, v, w) +# define orrr(op, rk, rj, rd) _orrr(_jit, op, rk, rj, rd) +# define ou5rr(op, i5, rj, rd) _orrr(_jit, op, i5, rj, rd) +# define orru5(op, rk, rj, i5) _orrr(_jit, op, rk, rj, i5) +static void _orrr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou2rrr(op, i2, rk, rj, rd) _ou2rrr(_jit, op, i2, rk, rj, rd) +static void _ou2rrr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou3rrr(op, u3, rk, rj, rd) _ou3rrr(_jit, op, u3, rk, rj, rd) +static void _ou3rrr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou6rr(op, u6, rj, rd) _ou6rr(_jit, op, u6, rj, rd) +static void _ou6rr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define ou5u1u5rr(op,m5,u1,l5,rj,rd) _ou5u1u5rr(_jit,op,m5,u1,l5,rj,rd) +static void _ou5u1u5rr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou6u6rr(op, m6, l6, rj, rd) _ou6u6rr(_jit, op, m6, l6, rj, rd) +static void _ou6u6rr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define o5r23(op, i5, rj, i2, rd) _o5r23(_jit, op, i5, rj, i2, rd) +static void _o5r23(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define o523r(op, i5, i2, rj, i3) _o523r(_jit, op, i5, i2, rj, i3) +static void _o523r(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define os12rr(op, i12, rj, rd) _os12rr(_jit, op, i12, rj, rd) +# define os12ru5(op, i12, rj, u5) _os12rr(_jit, op, i12, rj, u5) +static void _os12rr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou12rr(op, u12, rj, rd) _ou12rr(_jit, op, u12, rj, rd) +static void _ou12rr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou14u5r(op, u14, u5, rd) _ou14u5r(_jit, op, u14, u5, rd) +static void _osu14u5r(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define os14rr(op, s14, rj, rd) _os14rr(_jit, op, s14, rj, rd) +static void _os14rr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou8rr(op, u8, rd, rj) _ou8rr(_jit, op, u8, rd, rj) +# define ou8u5r(op, u8, u5, rj) _ou8rr(_jit, op, u8, u5, rj) +static void _ou8rr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define ou15(op, u15) _ou15(_jit, op, u15) +static void _ou15(jit_state_t*, jit_int32_t,jit_int32_t); +# define orrrr(op, ra, rk, rj, rd) _orrrr(_jit, op, ra, rk, rj, rd) +static void _orrrr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou5rru2u3(op,u5,rj,rk,u2,u3) _ou5rru2u3(_jit, op, u5, rj, rk, u2, u3) +static void _ou5rru2u3(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define os16rr(op, s16, rj, rd) _os16rr(_jit, op, s16, rj, rd) +static void _os16rr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define os20r(op, s20, rd) _os20r(_jit, op, s20, rd) +static void _os20r(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define orj21(op, rj, j21) _orj21(_jit, op, rj, j21) +static void _orj21(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ou2u3j21(op, u2, u3, j21) _ou2u3j21(_jit, op, u2, u3, j21) +static void _o2cj21(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oj16rr(op, j16, rj, rd) _oj16rr(_jit, op, j16, rj, rd) +static void _oj16rr(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oj26(op, j26) _oj26(_jit, op, j26) +static void _oj26(jit_state_t*, jit_int32_t,jit_int32_t); +# define CLO_W(rd, rj) ou5rr(0x000, 0x04, rj, rd) +# define CLZ_W(rd, rj) ou5rr(0x000, 0x05, rj, rd) +# define CTO_W(rd, rj) ou5rr(0x000, 0x06, rj, rd) +# define CTZ_W(rd, rj) ou5rr(0x000, 0x07, rj, rd) +# define CLO_D(rd, rj) ou5rr(0x000, 0x08, rj, rd) +# define CLZ_D(rd, rj) ou5rr(0x000, 0x09, rj, rd) +# define CTO_D(rd, rj) ou5rr(0x000, 0x0a, rj, rd) +# define CTZ_D(rd, rj) ou5rr(0x000, 0x0b, rj, rd) +# define REVB_2H(rd, rj) ou5rr(0x000, 0x0c, rj, rd) +# define REVB_4H(rd, rj) ou5rr(0x000, 0x0d, rj, rd) +# define REVB_2W(rd, rj) ou5rr(0x000, 0x0e, rj, rd) +# define REVB_D(rd, rj) ou5rr(0x000, 0x0f, rj, rd) +# define REVH_2W(rd, rj) ou5rr(0x000, 0x10, rj, rd) +# define REVH_D(rd, rj) ou5rr(0x000, 0x11, rj, rd) +# define BITREV_4B(rd, rj) ou5rr(0x000, 0x12, rj, rd) +# define BITREV_8B(rd, rj) ou5rr(0x000, 0x13, rj, rd) +# define BITREV_W(rd, rj) ou5rr(0x000, 0x14, rj, rd) +# define BITREV_D(rd, rj) ou5rr(0x000, 0x15, rj, rd) +# define EXT_W_H(rd, rj) ou5rr(0x000, 0x16, rj, rd) +# define EXT_W_B(rd, rj) ou5rr(0x000, 0x17, rj, rd) +# define RDTIMEL_W(rd, rj) ou5rr(0x000, 0x18, rj, rd) +# define RDTIMEH_W(rd, rj) ou5rr(0x000, 0x19, rj, rd) +# define RDTIME_D(rd, rj) ou5rr(0x000, 0x20, rj, rd) +# define CPUCFG(rd, rj) ou5rr(0x000, 0x21, rj, rd) +# define ASRTLE_D( rj, rk) orru5(0x002, rk, rj, 0x00) +# define ASRTGT_D( rj, rk) orru5(0x003, rk, rj, 0x00) +# define ALSL_W(rd, rj, rk, sa2) ou2rrr(0x002, sa2, rk, rj, rd) +# define ALSL_WU(rd, rj, rk, sa2) ou2rrr(0x003, sa2, rk, rj, rd) +# define BYTEPICK_W(rd, rj, rk, sa2) ou2rrr(0x004, sa2, rk, rj, rd) +# define BYTEPICK_D(rd, rj, rk, sa3) ou3rrr(0x003, sa3, rk, rj, rd) +# define ADD_W(rd, rj, rk) orrr(0x020, rk, rj, rd) +# define ADD_D(rd, rj, rk) orrr(0x021, rk, rj, rd) +# define SUB_W(rd, rj, rk) orrr(0x022, rk, rj, rd) +# define SUB_D(rd, rj, rk) orrr(0x023, rk, rj, rd) +# define SLT(rd, rj, rk) orrr(0x024, rk, rj, rd) +# define SLTU(rd, rj, rk) orrr(0x025, rk, rj, rd) +# define MASKEQZ(rd, rj, rk) orrr(0x026, rk, rj, rd) +# define MASKNEZ(rd, rj, rk) orrr(0x027, rk, rj, rd) +# define NOR(rd, rj, rk) orrr(0x028, rk, rj, rd) +# define AND(rd, rj, rk) orrr(0x029, rk, rj, rd) +# define OR(rd, rj, rk) orrr(0x02a, rk, rj, rd) +# define XOR(rd, rj, rk) orrr(0x02b, rk, rj, rd) +# define ORN(rd, rj, rk) orrr(0x02c, rk, rj, rd) +# define ANDN(rd, rj, rk) orrr(0x02d, rk, rj, rd) +# define SLL_W(rd, rj, rk) orrr(0x02e, rk, rj, rd) +# define SRL_W(rd, rj, rk) orrr(0x02f, rk, rj, rd) +# define SRA_W(rd, rj, rk) orrr(0x030, rk, rj, rd) +# define SLL_D(rd, rj, rk) orrr(0x031, rk, rj, rd) +# define SRL_D(rd, rj, rk) orrr(0x032, rk, rj, rd) +# define SRA_D(rd, rj, rk) orrr(0x033, rk, rj, rd) +# define ROTR_W(rd, rj, rk) orrr(0x036, rk, rj, rd) +# define ROTR_D(rd, rj, rk) orrr(0x037, rk, rj, rd) +# define MUL_W(rd, rj, rk) orrr(0x038, rk, rj, rd) +# define MULH_W(rd, rj, rk) orrr(0x039, rk, rj, rd) +# define MULH_WU(rd, rj, rk) orrr(0x03a, rk, rj, rd) +# define MUL_D(rd, rj, rk) orrr(0x03b, rk, rj, rd) +# define MULH_D(rd, rj, rk) orrr(0x03c, rk, rj, rd) +# define MULH_DU(rd, rj, rk) orrr(0x03d, rk, rj, rd) +# define MULW_D_W(rd, rj, rk) orrr(0x03e, rk, rj, rd) +# define MULW_D_WU(rd, rj, rk) orrr(0x03f, rk, rj, rd) +# define DIV_W(rd, rj, rk) orrr(0x040, rk, rj, rd) +# define MOD_W(rd, rj, rk) orrr(0x041, rk, rj, rd) +# define DIV_WU(rd, rj, rk) orrr(0x042, rk, rj, rd) +# define MOD_WU(rd, rj, rk) orrr(0x043, rk, rj, rd) +# define DIV_D(rd, rj, rk) orrr(0x044, rk, rj, rd) +# define MOD_D(rd, rj, rk) orrr(0x045, rk, rj, rd) +# define DIV_DU(rd, rj, rk) orrr(0x046, rk, rj, rd) +# define MOD_DU(rd, rj, rk) orrr(0x047, rk, rj, rd) +# define CRC_W_B_W(rd, rj, rk) orrr(0x048, rk, rj, rd) +# define CRC_W_H_W(rd, rj, rk) orrr(0x049, rk, rj, rd) +# define CRC_W_W_W(rd, rj, rk) orrr(0x04a, rk, rj, rd) +# define CRC_W_D_W(rd, rj, rk) orrr(0x04b, rk, rj, rd) +# define CRCC_W_B_W(rd, rj, rk) orrr(0x04c, rk, rj, rd) +# define CRCC_W_H_W(rd, rj, rk) orrr(0x04d, rk, rj, rd) +# define CCRC_W_W_W(rd, rj, rk) orrr(0x04e, rk, rj, rd) +# define CCRC_W_D_W(rd, rj, rk) orrr(0x04f, rk, rj, rd) +# define BREAK(code) ou15(0x054, code) +# define DBCL(code) ou15(0x055, code) +# define SYSCALL(code) ou15(0x056, code) +# define ALSL_D(rd, rj, rk, sa2) ou2rrr(0x016, sa2, rk, rj, rd) +# define SLLI_W(rd, rj, ui5) ou5rr(0x081, ui5, rj, rd) +# define SLLI_D(rd, rj, ui6) ou6rr(0x041, ui6, rj, rd) +# define SRLI_W(rd, rj, ui5) ou5rr(0x089, ui5, rj, rd) +# define SRLI_D(rd, rj, ui6) ou6rr(0x045, ui6, rj, rd) +# define SRAI_W(rd, rj, ui5) ou5rr(0x091, ui5, rj, rd) +# define SRAI_D(rd, rj, ui6) ou6rr(0x049, ui6, rj, rd) +# define ROTRI_W(rd, rj, ui5) ou5rr(0x099, ui5, rj, rd) +# define ROTRI_D(rd, rj, ui6) ou6rr(0x04d, ui6, rj, rd) +# define BSTRINS_W(rd, rj, m5, l5) ou5u1u5rr(0x003, m5, 0x0, l5, rj, rd) +# define BSTRPICK_W(rd, rj, m5, l5) ou5u1u5rr(0x003, m5, 0x1, l5, rj, rd) +# define BSTRINS_D(rd, rj, m6, l6) ou6u6rr(0x002, m6, l6, rj, rd) +# define BSTRPICK_D(rd, rj, m6, l6) ou6u6rr(0x003, m6, l6, rj, rd) +# define SLTI(rd, rj, i12) os12rr(0x008, i12, rj, rd) +# define SLTUI(rd, rj, i12) os12rr(0x009, i12, rj, rd) +# define ADDI_W(rd, rj, si12) os12rr(0x00a, si12, rj, rd) +# define ADDI_D(rd, rj, si12) os12rr(0x00b, si12, rj, rd) +# define LU52I_D(rd, rj, i12) os12rr(0x00c, i12, rj, rd) +# define ANDI(rd, rj, i12) ou12rr(0x00d, i12, rj, rd) +# define ORI(rd, rj, i12) ou12rr(0x00e, i12, rj, rd) +# define XORI(rd, rj, i12) ou12rr(0x00f, i12, rj, rd) +# define CSRRD(rd, csr) ou14u5r(0x004, csr, 0x00, rd) +# define CSRWR(rd, csr) ou14u5r(0x004, csr, 0x01, rd) +# define CSRXCHG(rd, rj, csr) ou14u5r(0x004, csr, rj, rd) +# define CACOP(i5, rj, si12) os12ru5(0x018, si12, rj, i5) +# define LDDIR(rd, rj, level) ou8rr(0x190, level, rj, rd) +# define LDPTE( rj, level) ou8u5r(0x191, level, rj, 0x00) +# define IOCSRRD_B(rd, rj) ou5rr(0xc90, 0x00, rj, rd) +# define IOCSRRD_H(rd, rj) ou5rr(0xc90, 0x01, rj, rd) +# define IOCSRRD_W(rd, rj) ou5rr(0xc90, 0x02, rj, rd) +# define IOCSRRD_D(rd, rj) ou5rr(0xc90, 0x03, rj, rd) +# define IOCSRWR_B(rd, rj) ou5rr(0xc90, 0x04, rj, rd) +# define IOCSRWR_H(rd, rj) ou5rr(0xc90, 0x05, rj, rd) +# define IOCSRWR_W(rd, rj) ou5rr(0xc90, 0x06, rj, rd) +# define IOCSRWR_D(rd, rj) ou5rr(0xc90, 0x07, rj, rd) +# define TLBCLR() ii( 0x6482000) +# define TLBFLUSH() ii( 0x6482400) +# define TLBSRCH() ii( 0x6482800) +# define TLBRD() ii( 0x6482c00) +# define TLBWR() ii( 0x6483000) +# define TLBFILL() ii( 0x6483400) +# define ERTN() ii( 0x6483800) +# define IDLE(level) ou15(0xc91, level) +# define INVTLB(op, rj, rk) orru5(0xc93, rk, rj, op) +# define ADDU16I_D(rd, rj, si16) os16rr(0x004, si16, rj, rd) +# define LU12I_W(rd, si20) os20r(0x00a, si20, rd) +# define LU32I_D(rd, si20) os20r(0x00b, si20, rd) +# define PCADDI(rd, si20) os20r(0x00c, si20, rd) +# define PCALAU12I(rd, si20) os20r(0x00d, si20, rd) +# define PCADDU12I(rd, si20) os20r(0x00e, si20, rd) +# define PCADDU18I(rd, si20) os20r(0x00f, si20, rd) +# define LL_W(rd, rj, si14) os14rr(0x020, si14, rj, rd) +# define SC_W(rd, rj, si14) os14rr(0x021, si14, rj, rd) +# define LL_D(rd, rj, si14) os14rr(0x022, si14, rj, rd) +# define SC_D(rd, rj, si14) os14rr(0x023, si14, rj, rd) +# define LDPTR_W(rd, rj, si14) os14rr(0x024, si14, rj, rd) +# define SDPTR_W(rd, rj, si14) os14rr(0x025, si14, rj, rd) +# define LDPTR_D(rd, rj, si14) os14rr(0x026, si14, rj, rd) +# define SDPTR_D(rd, rj, si14) os14rr(0x027, si14, rj, rd) +# define LD_B(rd, rj, si12) os12rr(0x0a0, si12, rj, rd) +# define LD_H(rd, rj, si12) os12rr(0x0a1, si12, rj, rd) +# define LD_W(rd, rj, si12) os12rr(0x0a2, si12, rj, rd) +# define LD_D(rd, rj, si12) os12rr(0x0a3, si12, rj, rd) +# define ST_B(rd, rj, si12) os12rr(0x0a4, si12, rj, rd) +# define ST_H(rd, rj, si12) os12rr(0x0a5, si12, rj, rd) +# define ST_W(rd, rj, si12) os12rr(0x0a6, si12, rj, rd) +# define ST_D(rd, rj, si12) os12rr(0x0a7, si12, rj, rd) +# define LD_BU(rd, rj, si12) os12rr(0x0a8, si12, rj, rd) +# define LD_HU(rd, rj, si12) os12rr(0x0a9, si12, rj, rd) +# define LD_WU(rd, rj, si12) os12rr(0x0aa, si12, rj, rd) +# define PRELD(hint, rj, si12) os12ru5(0x0ab, si12, rj , hint) +# define LDX_B(rd, rj, rk) orrr(0x7000, rk, rj, rd) +# define LDX_H(rd, rj, rk) orrr(0x7008, rk, rj, rd) +# define LDX_W(rd, rj, rk) orrr(0x7010, rk, rj, rd) +# define LDX_D(rd, rj, rk) orrr(0x7018, rk, rj, rd) +# define STX_B(rd, rj, rk) orrr(0x7020, rk, rj, rd) +# define STX_H(rd, rj, rk) orrr(0x7028, rk, rj, rd) +# define STX_W(rd, rj, rk) orrr(0x7030, rk, rj, rd) +# define STX_D(rd, rj, rk) orrr(0x7038, rk, rj, rd) +# define LDX_BU(rd, rj, rk) orrr(0x7040, rk, rj, rd) +# define LDX_HU(rd, rj, rk) orrr(0x7048, rk, rj, rd) +# define LDX_WU(rd, rj, rk) orrr(0x7050, rk, rj, rd) +# define PRELDX(hint, rj, rk) orru5(0x7058, rk, rj, hint) +# define AMSWAP_W(rd, rj, rk) orrr(0x70c0, rk, rj, rd) +# define AMSWAP_D(rd, rj, rk) orrr(0x70c1, rk, rj, rd) +# define AMADD_W(rd, rj, rk) orrr(0x70c2, rk, rj, rd) +# define AMADD_D(rd, rj, rk) orrr(0x70c3, rk, rj, rd) +# define AMAND_W(rd, rj, rk) orrr(0x70c4, rk, rj, rd) +# define AMAND_D(rd, rj, rk) orrr(0x70c5, rk, rj, rd) +# define AMOR_W(rd, rj, rk) orrr(0x70c6, rk, rj, rd) +# define AMOR_D(rd, rj, rk) orrr(0x70c7, rk, rj, rd) +# define AMXOR_W(rd, rj, rk) orrr(0x70c8, rk, rj, rd) +# define AMXOR_D(rd, rj, rk) orrr(0x70c9, rk, rj, rd) +# define AMMAX_W(rd, rj, rk) orrr(0x70ca, rk, rj, rd) +# define AMMAX_D(rd, rj, rk) orrr(0x70cb, rk, rj, rd) +# define AMMIN_W(rd, rj, rk) orrr(0x70cc, rk, rj, rd) +# define AMMIN_D(rd, rj, rk) orrr(0x70cd, rk, rj, rd) +# define AMMAX_WU(rd, rj, rk) orrr(0x70ce, rk, rj, rd) +# define AMMAX_DU(rd, rj, rk) orrr(0x70cf, rk, rj, rd) +# define AMMIN_WU(rd, rj, rk) orrr(0x70d0, rk, rj, rd) +# define AMMIN_DU(rd, rj, rk) orrr(0x70d1, rk, rj, rd) +# define AMSWAP_DB_W(rd, rj, rk) orrr(0x70d2, rk, rj, rd) +# define AMSWAP_DB_D(rd, rj, rk) orrr(0x70d3, rk, rj, rd) +# define AMADD_DB_W(rd, rj, rk) orrr(0x70d4, rk, rj, rd) +# define AMADD_DB_D(rd, rj, rk) orrr(0x70d5, rk, rj, rd) +# define AMAND_DB_W(rd, rj, rk) orrr(0x70d6, rk, rj, rd) +# define AMAND_DB_D(rd, rj, rk) orrr(0x70d7, rk, rj, rd) +# define AMOR_DB_W(rd, rj, rk) orrr(0x70d8, rk, rj, rd) +# define AMOR_DB_D(rd, rj, rk) orrr(0x70d9, rk, rj, rd) +# define AMXOR_DB_W(rd, rj, rk) orrr(0x70da, rk, rj, rd) +# define AMXOR_DB_D(rd, rj, rk) orrr(0x70db, rk, rj, rd) +# define AMMAX_DB_W(rd, rj, rk) orrr(0x70dc, rk, rj, rd) +# define AMMAX_DB_D(rd, rj, rk) orrr(0x70dd, rk, rj, rd) +# define AMMIN_DB_W(rd, rj, rk) orrr(0x70de, rk, rj, rd) +# define AMMIN_DB_D(rd, rj, rk) orrr(0x70df, rk, rj, rd) +# define AMMAX_DB_WU(rd, rj, rk) orrr(0x70e0, rk, rj, rd) +# define AMMAX_DB_DU(rd, rj, rk) orrr(0x70e1, rk, rj, rd) +# define AMMIN_DB_WU(rd, rj, rk) orrr(0x70e2, rk, rj, rd) +# define AMMIN_DB_DU(rd, rj, rk) orrr(0x70e3, rk, rj, rd) +# define DBAR(hint) ou15(0x70e4, hint) +# define IBAR(hint) ou15(0x70e5, hint) +# define LDGT_B(rd, rj, rk) orrr(0x70f0, rk, rj, rd) +# define LDGT_H(rd, rj, rk) orrr(0x70f1, rk, rj, rd) +# define LDGT_W(rd, rj, rk) orrr(0x70f2, rk, rj, rd) +# define LDGT_D(rd, rj, rk) orrr(0x70f3, rk, rj, rd) +# define LDLE_B(rd, rj, rk) orrr(0x70f4, rk, rj, rd) +# define LDLE_H(rd, rj, rk) orrr(0x70f5, rk, rj, rd) +# define LDLE_W(rd, rj, rk) orrr(0x70f6, rk, rj, rd) +# define LDLE_D(rd, rj, rk) orrr(0x70f7, rk, rj, rd) +# define STGT_B(rd, rj, rk) orrr(0x70f8, rk, rj, rd) +# define STGT_H(rd, rj, rk) orrr(0x70f9, rk, rj, rd) +# define STGT_W(rd, rj, rk) orrr(0x70fa, rk, rj, rd) +# define STGT_D(rd, rj, rk) orrr(0x70fb, rk, rj, rd) +# define STLE_B(rd, rj, rk) orrr(0x70fc, rk, rj, rd) +# define STLE_H(rd, rj, rk) orrr(0x70rd, rk, rj, rd) +# define STLE_W(rd, rj, rk) orrr(0x70fe, rk, rj, rd) +# define STLE_D(rd, rj, rk) orrr(0x70ff, rk, rj, rd) +# define BEQZ( rj, offs) orj21(0x010, rj, offs) +# define BNEZ( rj, offs) orj21(0x011, rj, offs) +# define BCEQZ( cj, offs) ou2u3j21(0x012, 0x0, cj, offs) +# define BCNEZ( cj, offs) ou2u3j21(0x012, 0x1, cj, offs) +# define JIRL(rd, rj, offs) oj16rr(0x013, offs, rj, rd) +# define B(offs) oj26(0x014, offs) +# define BL(offs) oj26(0x015, offs) +# define BEQ(rj, rd, offs) oj16rr(0x016, offs, rj, rd) +# define BNE(rj, rd, offs) oj16rr(0x017, offs, rj, rd) +# define BLT(rj, rd, offs) oj16rr(0x018, offs, rj, rd) +# define BGE(rj, rd, offs) oj16rr(0x019, offs, rj, rd) +# define BLTU(rj, rd, offs) oj16rr(0x01a, offs, rj, rd) +# define BGEU(rj, rd, offs) oj16rr(0x01b, offs, rj, rd) +# define NOP() ANDI(_ZERO_REGNO, _ZERO_REGNO, 0) +# define nop(i0) _nop(_jit, i0) +# define comr(r0, r1) NOR(r0, r1, r1) +# define negr(r0, r1) subr(r0, _ZERO_REGNO, r1) +static void _nop(jit_state_t*,jit_int32_t); +# define movr(r0, r1) _movr(_jit, r0, r1) +static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); +# define movi(r0, i0) _movi(_jit, r0, i0) +static void _movi(jit_state_t*, jit_int32_t, jit_word_t); +# define movi_p(r0, i0) _movi_p(_jit, r0, i0) +static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); +# define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) +static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) +static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) +# define addr(r0, r1, r2) ADD_D(r0, r1, r2) +# define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) +static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2) +static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define addci(r0, r1, i0) _addci(_jit, r0, r1, i0) +static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2) +static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0) +static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define subr(r0, r1, r2) SUB_D(r0, r1, r2) +# define subi(r0, r1, i0) _subi(_jit, r0, r1, i0) +static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2) +static void _subcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define subci(r0, r1, i0) _subci(_jit, r0, r1, i0) +static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2) +static void _subxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) +static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define mulr(r0, r1, r2) MUL_D(r0, r1, r2) +# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) +static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define qmulr(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 1) +# define qmulr_u(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 0) +# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign) +static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_bool_t); +# define qmuli(r0, r1, r2, i0) iqmuli(r0, r1, r2, i0, 1) +# define qmuli_u(r0, r1, r2, i0) iqmuli(r0, r1, r2, i0, 0) +# define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign) +static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t, jit_word_t, jit_bool_t); +# define divr(r0, r1, r2) DIV_D(r0, r1, r2) +# define divi(r0, r1, i0) _divi(_jit, r0, r1, i0) +static void _divi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define divr_u(r0, r1, r2) DIV_DU(r0, r1, r2) +# define divi_u(r0, r1, i0) _divi_u(_jit, r0, r1, i0) +static void _divi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define qdivr(r0, r1, r2, r3) iqdivr(r0, r1, r2, r3, 1) +# define qdivr_u(r0, r1, r2, r3) iqdivr(r0, r1, r2, r3, 0) +# define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign) +static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_bool_t); +# define qdivi(r0, r1, r2, i0) iqdivi(r0, r1, r2, i0, 1) +# define qdivi_u(r0, r1, r2, i0) iqdivi(r0, r1, r2, i0, 0) +# define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign) +static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t, jit_word_t, jit_bool_t); +# define remr(r0, r1, r2) MOD_D(r0, r1, r2) +# define remi(r0, r1, i0) _remi(_jit, r0, r1, i0) +static void _remi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define remr_u(r0, r1, r2) MOD_DU(r0, r1, r2) +# define remi_u(r0, r1, i0) _remi_u(_jit, r0, r1, i0) +static void _remi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define lshr(r0, r1, r2) SLL_D(r0, r1, r2) +# define lshi(r0, r1, i0) SLLI_D(r0, r1, i0) +# define rshr(r0, r1, r2) SRA_D(r0, r1, r2) +# define rshi(r0, r1, i0) SRAI_D(r0, r1, i0) +# define rshr_u(r0, r1, r2) SRL_D(r0, r1, r2) +# define rshi_u(r0, r1, i0) SRLI_D(r0, r1, i0) +# define andr(r0, r1, r2) AND(r0, r1, r2) +# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0) +static void _andi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define orr(r0, r1, r2) OR(r0, r1, r2) +# define ori(r0, r1, i0) _ori(_jit, r0, r1, i0) +static void _ori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define xorr(r0, r1, r2) XOR(r0, r1, r2) +# define xori(r0, r1, i0) _xori(_jit, r0, r1, i0) +static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldr_c(r0, r1) LD_B(r0, r1, 0) +# define ldi_c(r0, i0) _ldi_c(_jit, r0, i0) +static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_uc( r0, r1) LD_BU(r0, r1, 0) +# define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0) +static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_s(r0, r1) LD_H(r0, r1, 0) +# define ldi_s(r0, i0) _ldi_s(_jit, r0, i0) +static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_us(r0, r1) LD_HU(r0, r1, 0) +# define ldi_us(r0, i0) _ldi_us(_jit, r0, i0) +static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_i(r0, r1) LD_W(r0, r1, 0) +# define ldi_i(r0, i0) _ldi_i(_jit, r0, i0) +static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_ui(r0, r1) LD_WU(r0, r1, 0) +# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0) +static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t); +# define ldr_l(r0, r1) LD_D(r0, r1, 0) +# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0) +static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t); +# define ldxr_c(r0, r1, r2) LDX_B(r0, r1, r2) +# define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0) +static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_uc(r0, r1, r2) LDX_BU(r0, r1, r2) +# define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit,r0, r1, i0) +static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_s(r0, r1, r2) LDX_H(r0, r1, r2) +# define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0) +static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_us(r0, r1, r2) LDX_HU(r0, r1, r2) +# define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0) +static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_i(r0, r1, r2) LDX_W(r0, r1, r2) +# define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0) +static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_ui(r0, r1, r2) LDX_WU(r0, r1, r2) +# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0) +static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxr_l(r0, r1, r2) LDX_D(r0, r1, r2) +# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0) +static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define str_c(r0, r1) ST_B(r1, r0, 0) +# define sti_c(i0, r0) _sti_c(_jit, i0, r0) +static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t); +# define str_s(r0, r1) ST_H(r1, r0, 0) +# define sti_s(i0, r0) _sti_s(_jit, i0, r0) +static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t); +# define str_i(r0, r1) ST_W(r1, r0, 0) +# define sti_i(i0, r0) _sti_i(_jit, i0, r0) +static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t); +# define str_l(r0, r1) ST_D(r1, r0, 0) +# define sti_l(i0, r0) _sti_l(_jit, i0, r0) +static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t); +# define stxr_c(r0, r1, r2) STX_B(r2, r1, r0) +# define stxi_c(i0, r0, r1) _stxi_c(_jit,i0, r0, r1) +static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxr_s(r0, r1, r2) STX_H(r2, r1, r0) +# define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1) +static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxr_i(r0, r1, r2) STX_W(r2, r1, r0) +# define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1) +static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxr_l(r0, r1, r2) STX_D(r2, r1, r0) +# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) +static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*, jit_int32_t, jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*, jit_int32_t, jit_int32_t); +# define bswapr_ul(r0, r1) REVB_D(r0, r1) +# define extr_c(r0, r1) EXT_W_B(r0, r1) +# define extr_uc(r0, r1) BSTRPICK_D(r0, r1, 7, 0) +# define extr_s(r0, r1) EXT_W_H(r0, r1) +# define extr_us(r0, r1) BSTRPICK_D(r0, r1, 15, 0) +# define extr_i(r0, r1) SLLI_W(r0, r1, 0) +# define extr_ui(r0, r1) BSTRPICK_D(r0, r1, 31, 0) +# define ltr(r0, r1, r2) SLT(r0, r1, r2) +# define lti(r0, r1, i0) _lti(_jit, r0, r1, i0) +static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ltr_u(r0, r1, r2) SLTU(r0, r1, r2) +# define lti_u(r0, r1, i0) _lti_u(_jit, r0, r1, i0) +static void _lti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ler(r0, r1, r2) _ler(_jit, r0, r1, r2) +static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lei(r0, r1, i0) _lei(_jit, r0, r1, i0) +static void _lei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2) +static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0) +static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2) +static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0) +static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ger(r0, r1, r2) _ger(_jit, r0, r1, r2) +static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gei(r0, r1, i0) _gei(_jit, r0, r1, i0) +static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2) +static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0) +static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define gtr(r0, r1, r2) SLT(r0, r2, r1) +# define gti(r0, r1, i0) _gti(_jit, r0, r1, i0) +static void _gti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define gtr_u(r0, r1, r2) SLTU(r0, r2, r1) +# define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0) +static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ner(r0, r1, r2) _ner(_jit, r0, r1, r2) +static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define nei(r0, r1, i0) _nei(_jit, r0, r1, i0) +static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1) +static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1) +static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1) +static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1) +static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bler(i0, r0, r1) _bler(_jit, i0, r0, r1) +static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blei(i0, r0, i1) _blei(_jit, i0, r0, i1) +static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1) +static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1) +static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1) +static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1) +static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bger(i0, r0, r1) _bger(_jit, i0, r0, r1) +static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1) +static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1) +static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1) +static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1) +static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1) +static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1) +static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1) +static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bner(i0, r0, r1) _bner(_jit, i0, r0, r1) +static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1) +static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define jmpr(r0) JIRL(_ZERO_REGNO, r0, 0) +# define jmpi(i0) _jmpi(_jit, i0) +static jit_word_t _jmpi(jit_state_t*, jit_word_t); +# define jmpi_p(i0) _jmpi_p(_jit, i0) +static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); +# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1) +static jit_word_t _boaddr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1) +static jit_word_t _boaddi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1) +static jit_word_t _boaddr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1) +static jit_word_t _boaddi_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1) +static jit_word_t _bxaddr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1) +static jit_word_t _bxaddi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1) +static jit_word_t _bxaddr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1) +static jit_word_t _bxaddi_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1) +static jit_word_t _bosubr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1) +static jit_word_t _bosubi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1) +static jit_word_t _bosubr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1) +static jit_word_t _bosubi_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1) +static jit_word_t _bxsubr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1) +static jit_word_t _bxsubi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1) +static jit_word_t _bxsubr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1) +static jit_word_t _bxsubi_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define bmsr(br, r0, r1) _bmsr(_jit, br, r0, r1) +static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define bmsi(br, r0, i0) _bmsi(_jit, br, r0, i0) +static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +# define bmcr(br, r0, r1) _bmcr(_jit, br, r0, r1) +static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define bmci(br, r0, i0) _bmci(_jit, br, r0, i0) +static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +# define callr(r0) JIRL(_RA_REGNO, r0, 0) +# define calli(i0) _calli(_jit, i0) +static jit_word_t _calli(jit_state_t*, jit_word_t); +# define calli_p(i0) _calli_p(_jit, i0) +static jit_word_t _calli_p(jit_state_t*, jit_word_t); +# define prolog(i0) _prolog(_jit, i0) +static void _prolog(jit_state_t*, jit_node_t*); +# define epilog(i0) _epilog(_jit, i0) +static void _epilog(jit_state_t*, jit_node_t*); +# define vastart(r0) _vastart(_jit, r0) +static void _vastart(jit_state_t*, jit_int32_t); +# define vaarg(r0, r1) _vaarg(_jit, r0, r1) +static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); +#define patch_at(jump, label) _patch_at(_jit, jump, label) +static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +#endif + +#if CODE +static void +_orrr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t rk, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0xffff)); + assert(!(rk & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 15) | (rk << 10) | (rj << 5) | rd); +} + +static void +_ou2rrr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t u2, jit_int32_t rk, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x7ff)); + assert(!(u2 & ~3)); + assert(!(rk & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 17) | (u2 << 15) | (rk << 10) | (rj << 5) | rd); +} + +static void +_ou3rrr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t u3, jit_int32_t rk, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3ff)); + assert(!(u3 & ~7)); + assert(!(rk & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 18) | (u3 << 15) | (rk << 10) | (rj << 5) | rd); +} + +static void +_ou6rr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t u6, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3ff)); + assert(!(u6 & ~0x3f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 16) | (u6 << 10) | (rj << 5) | rd); +} + +static void +_ou5u1u5rr(jit_state_t *_jit, jit_int32_t op, jit_int32_t m5, + jit_int32_t u1, jit_int32_t l5, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x7ff)); + assert(!(m5 & ~0x1f)); + assert(!(u1 & ~1)); + assert(!(l5 & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 21) | (m5 << 16) | (u1 << 15) | (l5 << 10) | (rj << 5) | rd); +} + +static void +_ou6u6rr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t m6, jit_int32_t l6, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3ff)); + assert(!(m6 & ~0x3f)); + assert(!(l6 & ~0x3f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 22) | (m6 << 16) | (l6 << 10) | (rj << 5) | rd); +} + +static void +_o5r23(jit_state_t *_jit, jit_int32_t op, + jit_int32_t i5, jit_int32_t rj, jit_int32_t i2, jit_int32_t i3) +{ + assert(!(op & ~0xffff)); + assert(!(i5 & ~0x3f)); + assert(!(rj & ~0x3f)); + assert(!(i2 & ~0x3)); + assert(!(i3 & ~0x1f)); + ii((op << 15) | (i5 << 10) | (rj << 5) | (i2 << 3) | i3); +} + +static void +_o523r(jit_state_t *_jit, jit_int32_t op, + jit_int32_t i5, jit_int32_t i2, jit_int32_t i3, jit_int32_t rd) +{ + assert(!(op & ~0xffff)); + assert(!(i5 & ~0x3f)); + assert(!(i2 & ~0x3)); + assert(!(i3 & ~0x7)); + assert(!(rd & ~0x3f)); + ii((op << 15) | (i5 << 10) | (i2 << 8) | (i3 << 5) | rd); +} + +static void +_os12rr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t s12, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3ff)); + assert(s12 <= 2047 && s12 >= -2048); s12 &= 0xfff; + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 22) | (s12 << 10) | (rj << 5) | rd); +} + +static void +_ou12rr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t u12, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3ff)); + assert(!(u12 & ~0xfff)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 22) | (u12 << 10) | (rj << 5) | rd); +} + +static void +_ou14u5r(jit_state_t *_jit, + jit_int32_t op, jit_int32_t u14, jit_int32_t u5, jit_int32_t rd) +{ + assert(!(op & ~0xff)); + assert(!(u14 & ~0x3fff)); + assert(!(u5 & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 24) | (u14 << 10) | (u5 << 5) | rd); +} + +static void +_os14rr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t s14, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0xff)); + assert(s14 <= 8191 && s14 >= -8192); s14 &= 0x3fff; + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 24) | (s14 << 10) | (rj << 5) | rd); +} + +static void +_ou8rr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t u8, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3fff)); + assert(!(u8 & ~0xff)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 18) | (u8 << 10) | (rj << 5) | rd); +} + +static void +_ou15(jit_state_t *_jit, jit_int32_t op, jit_int32_t u15) +{ + assert(!(op & ~0x1ffff)); + assert(!(u15 & ~0x7fff)); + ii((op << 15) | u15); +} + +static void +_orrrr(jit_state_t *_jit, jit_int32_t op, + jit_int32_t ra, jit_int32_t rk, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0xfff)); + assert(!(ra & ~0x1f)); + assert(!(rk & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 20) | (ra << 15) | (rk << 10) | (rj << 5) | rd); +} + +static void +_ou5rru2u3(jit_state_t *_jit,jit_int32_t op, jit_int32_t u5, + jit_int32_t rk, jit_int32_t rj, jit_int32_t u2, jit_int32_t u3) +{ + assert(!(op & ~0xfff)); + assert(!(u5 & ~0x1f)); + assert(!(rk & ~0x1f)); + assert(!(rj & ~0x1f)); + assert(!(u2 & ~3)); + assert(!(u3 & ~7)); + ii((op << 20) | (u5 << 15) | (rk << 10) | (rj << 5) | (u2 << 3) | u3); +} + +static void +_os16rr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t s16, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x3f)); + assert(s16 <= 32767 && s16 >= -32768); s16 &= 0xffff; + assert(!(rj & ~0x1f)); + assert(!(rd & ~0x1f)); + ii((op << 26) | (s16 << 10) | (rj << 5) | rd); +} + +static void +_os20r(jit_state_t *_jit, jit_int32_t op, jit_int32_t s20, jit_int32_t rd) +{ + assert(!(op & ~0x7f)); + assert(s20 <= 524287 && s20 >= -524288); s20 &= 0xfffff; + assert(!(rd & ~0x1f)); + ii((op << 25) | (s20 << 5) | rd); +} + +static void +_orj21(jit_state_t *_jit, jit_int32_t op, jit_int32_t rj ,jit_int32_t j21) +{ + assert(!(op & ~0x7f)); + assert(j21 <= 1048575 && j21 >= -1048576); j21 &= 0x1fffff; + assert(!(rj & ~0x1f)); + ii((op << 26) | ((j21 & 0xffff) << 10) | (rj << 5) | (j21 >> 16)); +} + +static void +_ou2u3j21(jit_state_t *_jit, + jit_int32_t op, jit_int32_t u2, jit_int32_t u3, jit_int32_t j21) +{ + assert(!(op & ~0x7f)); + assert(j21 <= 1048575 && j21 >= -1048576); j21 &= 0x1fffff; + assert(!(u2 & ~3)); + assert(!(u3 & ~7)); + ii((op << 26) | ((j21 & 0xffff) << 10) | (u2 << 8) | (u3 << 5) | (j21 >> 16)); +} + +static void +_oj16rr(jit_state_t *_jit, + jit_int32_t op, jit_int32_t j16, jit_int32_t rj, jit_int32_t rd) +{ + assert(!(op & ~0x7f)); + assert(j16 <= 32767 && j16 >= -32768); j16 &= 0xffff; + assert(!(rd & ~0x1f)); + assert(!(rj & ~0x1f)); + ii((op << 26) | (j16 << 10) | (rj << 5) | rd); +} + +static void +_oj26(jit_state_t *_jit, jit_int32_t op, jit_int32_t j26) +{ + assert(!(op & ~0x7f)); + assert(j26 <= 33554431 && j26 >= -33554432); j26 &= 0x3ffffff; + ii((op << 26) | ((j26 & 0x1ffffff) << 10) | (j26 >> 16)); +} + +static void +_nop(jit_state_t *_jit, jit_int32_t i0) +{ + for (; i0 > 0; i0 -= 4) + NOP(); + assert(i0 == 0); +} + +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + MASKEQZ(rn(reg), r1, r2); + MASKNEZ(r0, r0, r2); + OR(r0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + MASKNEZ(rn(reg), r1, r2); + MASKEQZ(r0, r0, r2); + OR(r0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t t0, r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + t0 = jit_get_reg(jit_class_gpr); + movi(r0, 0); /* Assume will fail */ + DBAR(0); /* Barrier */ + retry = _jit->pc.w; + LL_D(rn(t0), r1, 0); /* Load current value */ + jump0 = _jit->pc.w; + BNE(rn(t0), r2, 0); /* If not equal, already done and r0 is zero */ + movr(r0, r3); /* Make r0 an inout argument */ + SC_D(r0, r1, 0); /* r0 is 0 if failed, 1 if succeed */ + jump1 = _jit->pc.w; + BEQ(r0, _ZERO_REGNO, 0); + /* FIXME Not certain what 0x700 hint means. Copied from C generated code */ + DBAR(0x700); + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + jit_unget_reg(t0); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + OR(r0, r1, _ZERO_REGNO); +} + +static void +_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + if (i0 == 0) + OR(r0, _ZERO_REGNO, _ZERO_REGNO); + else if (can_sign_extend_si12_p(i0)) + ADDI_D(r0, _ZERO_REGNO, i0); + else if (!(i0 & 0xffff) && can_sign_extend_si16_p(i0 >> 16)) + ADDU16I_D(r0, _ZERO_REGNO, i0 >> 16); + else { + jit_word_t w = i0 - _jit->pc.w; + /* If loading some constant reachable address */ + if (can_sign_extend_si32_p(w)) { + jit_int32_t lo = (jit_int32_t)w << 20 >> 20; + jit_int32_t hi = w - lo; + PCADDU12I(r0, hi >> 12); + if (lo) + ADDI_D(r0, r0, lo); + } + else { + jit_int32_t _00_11, _12_31, _32_51, _52_63; + _00_11 = i0 & 0xfff; + _12_31 = (i0 >> 12) & 0xfffff; + _32_51 = (i0 >> 32) & 0xfffff; + _52_63 = (i0 >> 52) & 0xfff; + if (_12_31) { + LU12I_W(r0, _12_31 << 12 >> 12); + if (_00_11) + ORI(r0, r0, _00_11); + } + else + ORI(r0, _ZERO_REGNO, _00_11); + if (_32_51 || (_12_31 & 0x80000)) + LU32I_D(r0, _32_51 << 12 >> 12); + if (_52_63 || (_12_31 & 0x80000) || (_32_51 & 0x80000)) + LU52I_D(r0, r0, _52_63 << 20 >> 20); + } + } +} + +static jit_word_t +_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t _00_11, _12_31, _32_51, _52_63; + _00_11 = i0 & 0xfff; + _12_31 = (i0 >> 12) & 0xfffff; + _32_51 = (i0 >> 32) & 0xfffff; + _52_63 = (i0 >> 52) & 0xfff; + w = _jit->pc.w; + LU12I_W(r0, _12_31 << 12 >> 12); + ORI(r0, r0, _00_11); + LU32I_D(r0, _32_51 << 12 >> 12); + LU52I_D(r0, r0, _52_63 << 20 >> 20); + return (w); +} + +static void +_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 == 0) + movr(r0, r1); + else if (can_sign_extend_si12_p(i0)) + ADDI_D(r0, r1, i0); + else if (!(i0 & 0xffff) && can_sign_extend_si16_p(i0 >> 16)) + ADDU16I_D(r0, r1, i0 >> 16); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t t0; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + t0 = jit_get_reg(jit_class_gpr); + addr(rn(t0), r1, r2); + SLTU(rn(jit_carry), rn(t0), r1); + movr(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + addr(r0, r1, r2); + SLTU(rn(jit_carry), r0, r1); + } +} + +static void +_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + t0 = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + if (can_sign_extend_si12_p(i0)) + ADDI_D(rn(t0), r1, i0); + else if (!(i0 & 0xffff) && can_sign_extend_si16_p(i0 >> 16)) + ADDU16I_D(rn(t0), r1, i0 >> 16); + else { + movi(rn(t0), i0); + addr(rn(t0), r1, rn(t0)); + } + SLTU(rn(jit_carry), rn(t0), r1); + movr(r0, rn(t0)); + } + else { + if (can_sign_extend_si12_p(i0)) + ADDI_D(r0, r1, i0); + else if (!(i0 & 0xffff) && can_sign_extend_si16_p(i0 >> 16)) + ADDU16I_D(r0, r1, i0 >> 16); + else { + movi(rn(t0), i0); + addr(r0, r1, rn(t0)); + } + SLTU(rn(jit_carry), r0, r1); + } + jit_unget_reg(t0); +} + +static void +_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t t0; + assert(jit_carry != _NOREG); + t0 = jit_get_reg(jit_class_gpr); + movr(rn(t0), rn(jit_carry)); + addcr(r0, r1, r2); + addcr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + assert(jit_carry != _NOREG); + t0 = jit_get_reg(jit_class_gpr); + movr(rn(t0), rn(jit_carry)); + addci(r0, r1, i0); + addcr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 == 0) + movr(r0, r1); + else if (can_sign_extend_si12_p(-i0)) + ADDI_D(r0, r1, -i0); + else if (!(-i0 & 0xffff) && can_sign_extend_si16_p(-i0 >> 16)) + ADDU16I_D(r0, r1, -i0 >> 16); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t t0; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + t0 = jit_get_reg(jit_class_gpr); + subr(rn(t0), r1, r2); + SLTU(rn(jit_carry), r1, rn(t0)); + movr(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + subr(r0, r1, r2); + SLTU(rn(jit_carry), r1, r0); + } +} + +static void +_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + t0 = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + if (can_sign_extend_si12_p(-i0)) + ADDI_D(rn(t0), r1, -i0); + else if (!(-i0 & 0xffff) && can_sign_extend_si16_p(-i0 >> 16)) + ADDU16I_D(rn(t0), r1, -i0 >> 16); + else { + movi(rn(t0), i0); + subr(rn(t0), r1, rn(t0)); + } + SLTU(rn(jit_carry), r1, rn(t0)); + movr(r0, rn(t0)); + } + else { + if (can_sign_extend_si12_p(-i0)) + ADDI_D(r0, r1, -i0); + else if (!(-i0 & 0xffff) && can_sign_extend_si16_p(-i0 >> 16)) + ADDU16I_D(r0, r1, -i0 >> 16); + else { + movi(rn(t0), i0); + subr(r0, r1, rn(t0)); + } + SLTU(rn(jit_carry), r1, r0); + } + jit_unget_reg(t0); +} + +static void +_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t t0; + assert(jit_carry != _NOREG); + t0 = jit_get_reg(jit_class_gpr); + movr(rn(t0), rn(jit_carry)); + subcr(r0, r1, r2); + subcr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + assert(jit_carry != _NOREG); + t0 = jit_get_reg(jit_class_gpr); + movr(rn(t0), rn(jit_carry)); + subci(r0, r1, i0); + subcr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void +_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + mulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t t0; + if (r0 == r2 || r0 == r3) { + t0 = jit_get_reg(jit_class_gpr); + mulr(rn(t0), r2, r3); + } + else + mulr(r0, r2, r3); + if (sign) + MULH_D(r1, r2, r3); + else + MULH_DU(r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqmulr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + +static void +_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + divr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + divr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t t0; + if (r0 == r2 || r0 == r3) + t0 = jit_get_reg(jit_class_gpr); + else + t0 = _NOREG; + if (sign) { + if (t0 == _NOREG) + DIV_D(r0, r2, r3); + else + DIV_D(rn(t0), r2, r3); + MOD_D(r1, r2, r3); + } + else { + if (t0 == _NOREG) + DIV_DU(r0, r2, r3); + else + DIV_DU(rn(t0), r2, r3); + MOD_DU(r1, r2, r3); + } + if (t0 != _NOREG) { + movr(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqdivr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + +static void +_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_zero_extend_u12_p(i0)) + ANDI(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + AND(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_zero_extend_u12_p(i0)) + ORI(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + OR(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_zero_extend_u12_p(i0)) + XORI(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + XOR(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_B(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_B(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_BU(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_BU(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_H(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_H(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_HU(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_HU(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_W(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_W(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_WU(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_WU(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_D(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LD_D(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_B(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_B(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_BU(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_BU(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_H(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_H(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_HU(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_HU(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_W(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_W(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_WU(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_WU(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + LD_D(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDX_D(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_B(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ST_B(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_H(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ST_H(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_W(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ST_W(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_D(r0, _ZERO_REGNO, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ST_D(r0, rn(reg), 0); + jit_unget_reg(reg); + } +} + +static void +_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_B(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + STX_B(r1, r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_H(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + STX_H(r1, r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_W(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + STX_W(r1, r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + ST_D(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + STX_D(r1, r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + REVB_2H(r0, r1); + extr_us(r0, r0); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + REVB_2W(r0, r1); + extr_ui(r0, r0); +} + +static void +_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + SLTI(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ltr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) + SLTUI(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ltr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + SLT(r0, r2, r1); + XORI(r0, r0, 1); +} + +static void +_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0 + 1)) + SLTI(r0, r1, i0 + 1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ler(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + SLTU(r0, r2, r1); + XORI(r0, r0, 1); +} + +static void +_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0 + 1)) + SLTUI(r0, r1, i0 + 1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ler_u(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + subr(r0, r1, r2); + SLTU(r0, _ZERO_REGNO, r0); + XORI(r0, r0, 1); +} + +static void +_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0) { + subi(r0, r1, i0); + SLTU(r0, _ZERO_REGNO, r0); + } + else + SLTU(r0, _ZERO_REGNO, r1); + XORI(r0, r0, 1); +} + +static void +_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + SLT(r0, r1, r2); + XORI(r0, r0, 1); +} + +static void +_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) { + SLTI(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + SLTU(r0, r1, r2); + XORI(r0, r0, 1); +} + +static void +_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_si12_p(i0)) { + SLTUI(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger_u(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 == 0) + SLT(r0, _ZERO_REGNO, r1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + SLT(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 == 0) + SLTU(r0, _ZERO_REGNO, r1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + SLTU(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + subr(r0, r1, r2); + SLTU(r0, _ZERO_REGNO, r0); +} + +static void +_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0) { + subi(r0, r1, i0); + SLTU(r0, _ZERO_REGNO, r0); + } + else + SLTU(r0, _ZERO_REGNO, r1); +} + +static jit_word_t +_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BLT(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) { + w = _jit->pc.w; + BLT(r0, _ZERO_REGNO, (i0 - w) >> 2); + } + if (i1) { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = _jit->pc.w; + BLT(r0, rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BLTU(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) + w = bltr_u(i0, r0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = _jit->pc.w; + BLTU(r0, rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + SLT(rn(reg), r1, r0); + w = _jit->pc.w; + BEQZ(rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bler(i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + SLTU(rn(reg), r1, r0); + w = _jit->pc.w; + BEQZ(rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) + w = bler_u(i0, r0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bler_u(i0, r0, rn(reg)); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BEQ(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) { + w = _jit->pc.w; + BEQZ(r0, (i0 - w) >> 2); + } + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = beqr(i0, r0, rn(reg)); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BGE(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) + w = bger(i0, r0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bger(i0, r0, rn(reg)); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BGEU(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) { + w = _jit->pc.w; + B((i0 - w) >> 2); + } + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bger_u(i0, r0, rn(reg)); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + SLT(rn(reg), r1, r0); + w = _jit->pc.w; + BNEZ(rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bgtr(i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + SLTU(rn(reg), r1, r0); + w = _jit->pc.w; + BNEZ(rn(reg), (i0 - w) >> 2); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bgtr_u(i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + w = _jit->pc.w; + BNE(r0, r1, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + if (i1 == 0) { + w = _jit->pc.w; + BNEZ(r0, (i0 - w) >> 2); + } + else { + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bner(i0, r0, rn(reg)); + jit_unget_reg(reg); + } + return (w); +} + +static jit_word_t +_jmpi(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (can_sign_extend_si26_p(i0)) + B(d); + else + w = jmpi_p(i0); + return (w); +} + +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + w = movi_p(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, b; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + /* t0 = r1 < 0; */ + SLT(rn(t0), r1, _ZERO_REGNO); + /* t1 = r0 */ + movr(rn(t1), r0); + /* r0 = r0 + r1 */ + addr(r0, r0, r1); + /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */ + w = _jit->pc.w; + BNEZ(rn(t0), 0); + /* r1 >= 0 */ + SLT(rn(t1), r0, rn(t1)); + b = _jit->pc.w; + B(0); + /* r1 < 0 */ + patch_at(w, _jit->pc.w); + SLT(rn(t1), rn(t1), r0); + /**/ + patch_at(b, _jit->pc.w); + w = _jit->pc.w; + BNEZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = boaddr(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + addr(rn(t0), r0, r1); + SLTU(rn(t1), rn(t0), r0); + movr(r0, rn(t0)); + w = _jit->pc.w; + BNEZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = boaddr_u(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, b; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + /* t0 = r1 < 0; */ + SLT(rn(t0), r1, _ZERO_REGNO); + /* t1 = r0 */ + movr(rn(t1), r0); + /* r0 = r0 + r1 */ + addr(r0, r0, r1); + /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */ + w = _jit->pc.w; + BNEZ(rn(t0), 0); + /* r1 >= 0 */ + SLT(rn(t1), r0, rn(t1)); + b = _jit->pc.w; + B(0); + /* r1 < 0 */ + patch_at(w, _jit->pc.w); + SLT(rn(t1), rn(t1), r0); + /**/ + patch_at(b, _jit->pc.w); + w = _jit->pc.w; + BEQZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = bxaddr(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + addr(rn(t0), r0, r1); + SLTU(rn(t1), rn(t0), r0); + movr(r0, rn(t0)); + w = _jit->pc.w; + BEQZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = bxaddr_u(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, b; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + /* t0 = 0 < r1; */ + SLT(rn(t0), _ZERO_REGNO, r1); + /* t1 = r0 */ + movr(rn(t1), r0); + /* r0 = r0 - r1 */ + subr(r0, r0, r1); + /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */ + w = _jit->pc.w; + BNE(rn(t0), _ZERO_REGNO, 0); + /* r1 >= 0 */ + SLT(rn(t1), r0, rn(t1)); + b = _jit->pc.w; + B(0); + /* r1 < 0 */ + patch_at(w, _jit->pc.w); + SLT(rn(t1), rn(t1), r0); + /**/ + patch_at(b, _jit->pc.w); + w = _jit->pc.w; + BNEZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = bosubr(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + subr(rn(t0), r0, r1); + SLTU(rn(t1), r0, rn(t0)); + movr(r0, rn(t0)); + w = _jit->pc.w; + BNEZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bosubi_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i0); + w = bosubr_u(br, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, b; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + /* t0 = r1 < 0; */ + SLT(rn(t0), _ZERO_REGNO, r1); + /* t1 = r0 */ + movr(rn(t1), r0); + /* r0 = r0 - r1 */ + subr(r0, r0, r1); + /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */ + w = _jit->pc.w; + BNEZ(rn(t0), 0); + /* r1 >= 0 */ + SLT(rn(t1), r0, rn(t1)); + b = _jit->pc.w; + B(0); + /* r1 < 0 */ + patch_at(w, _jit->pc.w); + SLT(rn(t1), rn(t1), r0); + /**/ + patch_at(b, _jit->pc.w); + w = _jit->pc.w; + BEQZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = bxsubr(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); + subr(rn(t0), r0, r1); + SLTU(rn(t1), r0, rn(t0)); + movr(r0, rn(t0)); + w = _jit->pc.w; + BEQZ(rn(t1), (i0 - w) >> 2); + jit_unget_reg(t1); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(t0), i1); + w = bxsubr_u(i0, r0, rn(t0)); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + AND(rn(t0), r0, r1); + w = _jit->pc.w; + BNEZ(rn(t0), (i0 - w) >> 2); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BNEZ(rn(t0), (i0 - w) >> 2); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + AND(rn(t0), r0, r1); + w = _jit->pc.w; + BEQZ(rn(t0), (i0 - w) >> 2); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BEQZ(rn(t0), (i0 - w) >> 2); + jit_unget_reg(t0); + return (w); +} + +static jit_word_t +_calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (can_sign_extend_si26_p(i0)) + BL(d); + else + w = calli_p(i0); + return (w); +} + +static jit_word_t +_calli_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_word_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + w = movi_p(rn(reg), i0); + callr(rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_prolog(jit_state_t *_jit, jit_node_t *node) +{ + jit_int32_t reg; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; + _jitc->function->stack = ((_jitc->function->self.alen - + /* align stack at 16 bytes */ + _jitc->function->self.aoff) + 15) & -16; + subi(_SP_REGNO, _SP_REGNO, stack_framesize); + stxi(0, _SP_REGNO, _RA_REGNO); + stxi(8, _SP_REGNO, _FP_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _S0)) + stxi(16, _SP_REGNO, rn(_S0)); + if (jit_regset_tstbit(&_jitc->function->regset, _S1)) + stxi(24, _SP_REGNO, rn(_S1)); + if (jit_regset_tstbit(&_jitc->function->regset, _S2)) + stxi(32, _SP_REGNO, rn(_S2)); + if (jit_regset_tstbit(&_jitc->function->regset, _S3)) + stxi(40, _SP_REGNO, rn(_S3)); + if (jit_regset_tstbit(&_jitc->function->regset, _S4)) + stxi(48, _SP_REGNO, rn(_S4)); + if (jit_regset_tstbit(&_jitc->function->regset, _S5)) + stxi(56, _SP_REGNO, rn(_S5)); + if (jit_regset_tstbit(&_jitc->function->regset, _S6)) + stxi(64, _SP_REGNO, rn(_S6)); + if (jit_regset_tstbit(&_jitc->function->regset, _S7)) + stxi(72, _SP_REGNO, rn(_S7)); + if (jit_regset_tstbit(&_jitc->function->regset, _S8)) + stxi(80, _SP_REGNO, rn(_S8)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) + stxi_d(88, _SP_REGNO, rn(_FS0)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) + stxi_d(96, _SP_REGNO, rn(_FS1)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) + stxi_d(104, _SP_REGNO, rn(_FS2)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) + stxi_d(112, _SP_REGNO, rn(_FS3)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) + stxi_d(120, _SP_REGNO, rn(_FS4)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) + stxi_d(128, _SP_REGNO, rn(_FS5)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) + stxi_d(136, _SP_REGNO, rn(_FS6)); + if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) + stxi_d(144, _SP_REGNO, rn(_FS7)); + movr(_FP_REGNO, _SP_REGNO); + if (_jitc->function->stack) + subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } + if (_jitc->function->self.call & jit_call_varargs) { + for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) + stxi(stack_framesize - ((8 - reg) * 8), + _FP_REGNO, rn(JIT_RA0 - reg)); + } +} + +static void +_epilog(jit_state_t *_jit, jit_node_t *node) +{ + if (_jitc->function->assume_frame) + return; + movr(_SP_REGNO, _FP_REGNO); + ldxi(_RA_REGNO, _SP_REGNO, 0); + ldxi(_FP_REGNO, _SP_REGNO, 8); + if (jit_regset_tstbit(&_jitc->function->regset, _S0)) + ldxi(rn(_S0), _SP_REGNO, 16); + if (jit_regset_tstbit(&_jitc->function->regset, _S1)) + ldxi(rn(_S1), _SP_REGNO, 24); + if (jit_regset_tstbit(&_jitc->function->regset, _S2)) + ldxi(rn(_S2), _SP_REGNO, 32); + if (jit_regset_tstbit(&_jitc->function->regset, _S3)) + ldxi(rn(_S3), _SP_REGNO, 40); + if (jit_regset_tstbit(&_jitc->function->regset, _S4)) + ldxi(rn(_S4), _SP_REGNO, 48); + if (jit_regset_tstbit(&_jitc->function->regset, _S5)) + ldxi(rn(_S5), _SP_REGNO, 56); + if (jit_regset_tstbit(&_jitc->function->regset, _S6)) + ldxi(rn(_S6), _SP_REGNO, 64); + if (jit_regset_tstbit(&_jitc->function->regset, _S7)) + ldxi(rn(_S7), _SP_REGNO, 72); + if (jit_regset_tstbit(&_jitc->function->regset, _S8)) + ldxi(rn(_S8), _SP_REGNO, 80); + if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) + ldxi_d(rn(_FS0), _SP_REGNO, 88); + if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) + ldxi_d(rn(_FS1), _SP_REGNO, 96); + if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) + ldxi_d(rn(_FS2), _SP_REGNO, 104); + if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) + ldxi_d(rn(_FS3), _SP_REGNO, 112); + if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) + ldxi_d(rn(_FS4), _SP_REGNO, 120); + if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) + ldxi_d(rn(_FS5), _SP_REGNO, 128); + if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) + ldxi_d(rn(_FS6), _SP_REGNO, 136); + if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) + ldxi_d(rn(_FS7), _SP_REGNO, 144); + addi(_SP_REGNO, _SP_REGNO, stack_framesize); + JIRL(_ZERO_REGNO, _RA_REGNO, 0); +} + +static void +_vastart(jit_state_t *_jit, jit_int32_t r0) +{ + assert(_jitc->function->self.call & jit_call_varargs); + /* Initialize va_list to the first stack argument. */ + if (jit_arg_reg_p(_jitc->function->vagp)) + addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8)); + else + addi(r0, _FP_REGNO, _jitc->function->self.size); +} + +static void +_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(_jitc->function->self.call & jit_call_varargs); + /* Load argument. */ + ldr(r0, r1); + /* Update va_list. */ + addi(r1, r1, sizeof(jit_word_t)); +} + +static void +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_uint32_t c; + union { + jit_uint32_t *i; + jit_word_t w; + } u; + u.w = instr; + c = u.i[0]; + /* movi_p? */ + if ((c >> 25) == 10) { /* LU12I_W */ + jit_int32_t _00_11, _12_31, _32_51, _52_63; + _00_11 = label & 0xfff; + _12_31 = (label >> 12) & 0xfffff; + _32_51 = (label >> 32) & 0xfffff; + _52_63 = (label >> 52) & 0xfff; + u.i[0] &= ~(0xfffff << 5); + u.i[0] |= _12_31 << 5; + assert((u.i[1] >> 22) == 14); /* ORI */ + u.i[1] &= ~(0xfff << 10); + u.i[1] |= _00_11 << 10; + assert((u.i[2] >> 25) == 11); /* LU32I_D */ + u.i[2] &= ~(0xfffff << 5); + u.i[2] |= _32_51 << 5; + assert((u.i[3] >> 22) == 12); /* LU52I_D */ + u.i[3] &= ~(0xfff << 10); + u.i[3] |= _52_63 << 0; + } + else if ((c >> 26) >= 22 && (c >> 26) <= 27) { + /* B{EQ,NE,LT,GE,LTU,GEU} */ + jit_word_t disp = (label - instr) >> 2; + assert(can_sign_extend_si16_p(disp)); + u.i[0] &= ~(0xffff << 10); + u.i[0] |= (disp & 0xffff) << 10; + } + else if ((c >> 26) == 20 || (c >> 26) == 21) { /* B or BL */ + jit_word_t disp = (label - instr) >> 2; + assert(can_sign_extend_si26_p(disp)); + disp &= 0x3ffffff; + u.i[0] &= ~0x3ffffff; + u.i[0] |= ((disp & 0xffff) << 10) | (disp >> 16); + } + else if ((c >> 26) >= 16 && (c >> 26) <= 18) { /* B{,C}{EQ,NE}Z */ + jit_word_t disp = (label - instr) >> 2; + assert(can_sign_extend_si21_p(disp)); + disp &= 0x1fffff; + u.i[0] &= ~((0xffff << 10) | 0x1f); + u.i[0] |= ((disp & 0xffff) << 10) | (disp >> 16); + } + else + abort(); +} +#endif diff --git a/deps/lightning/lib/jit_loongarch-fpu.c b/deps/lightning/lib/jit_loongarch-fpu.c new file mode 100644 index 000000000..5874afde6 --- /dev/null +++ b/deps/lightning/lib/jit_loongarch-fpu.c @@ -0,0 +1,1318 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# define FADD_S(fd, fj, fk) orrr(0x201, fk, fj, fd) +# define FADD_D(fd, fj, fk) orrr(0x202, fk, fj, fd) +# define FSUB_S(fd, fj, fk) orrr(0x205, fk, fj, fd) +# define FSUB_D(fd, fj, fk) orrr(0x206, fk, fj, fd) +# define FMUL_S(fd, fj, fk) orrr(0x209, fk, fj, fd) +# define FMUL_D(fd, fj, fk) orrr(0x20a, fk, fj, fd) +# define FDIV_S(fd, fj, fk) orrr(0x20d, fk, fj, fd) +# define FDIV_D(fd, fj, fk) orrr(0x20e, fk, fj, fd) +# define FMAX_S(fd, fj, fk) orrr(0x211, fk, fj, fd) +# define FMAX_D(fd, fj, fk) orrr(0x212, fk, fj, fd) +# define FMIN_S(fd, fj, fk) orrr(0x215, fk, fj, fd) +# define FMIN_D(fd, fj, fk) orrr(0x216, fk, fj, fd) +# define FMAXA_S(fd, fj, fk) orrr(0x219, fk, fj, fd) +# define FMAXA_D(fd, fj, fk) orrr(0x21a, fk, fj, fd) +# define FMINA_S(fd, fj, fk) orrr(0x21d, fk, fj, fd) +# define FMINA_D(fd, fj, fk) orrr(0x21e, fk, fj, fd) +# define FSCALEB_S(fd, fj, fk) orrr(0x221, fk, fj, fd) +# define FSCALEB_D(fd, fj, fk) orrr(0x222, fk, fj, fd) +# define FCOPYSIGN_S(fd, fj, fk) orrr(0x225, fk, fj, fd) +# define FCOPYSIGN_D(fd, fj, fk) orrr(0x226, fk, fj, fd) +# define FABS_S(fd, fj) ou5rr(0x228, 0x01, fj, fd) +# define FABS_D(fd, fj) ou5rr(0x228, 0x02, fj, fd) +# define FNEG_S(fd, fj) ou5rr(0x228, 0x05, fj, fd) +# define FNEG_D(fd, fj) ou5rr(0x228, 0x06, fj, fd) +# define FGLOB_S(fd, fj) ou5rr(0x228, 0x09, fj, fd) +# define FGLOB_D(fd, fj) ou5rr(0x228, 0x0a, fj, fd) +# define FCLASS_S(fd, fj) ou5rr(0x228, 0x0d, fj, fd) +# define FCLASS_D(fd, fj) ou5rr(0x228, 0x0e, fj, fd) +# define FSQRT_S(fd, fj) ou5rr(0x228, 0x11, fj, fd) +# define FSQRT_D(fd, fj) ou5rr(0x228, 0x12, fj, fd) +# define FRECIP_S(fd, fj) ou5rr(0x228, 0x15, fj, fd) +# define FRECIP_D(fd, fj) ou5rr(0x228, 0x16, fj, fd) +# define FRSQRT_S(fd, fj) ou5rr(0x228, 0x19, fj, fd) +# define FRSQRT_D(fd, fj) ou5rr(0x228, 0x1a, fj, fd) +# define FMOV_S(fd, fj) ou5rr(0x229, 0x05, fj, fd) +# define FMOV_D(fd, fj) ou5rr(0x229, 0x06, fj, fd) +# define MOVGR2FR_W(fd, rj) ou5rr(0x229, 0x09, rj, fd) +# define MOVGR2FR_D(fd, rj) ou5rr(0x229, 0x0a, rj, fd) +# define MOVGR2FRH_W(fd, rj) ou5rr(0x229, 0x0b, rj, fd) +# define MOVFR2GR_S(rd, fj) ou5rr(0x229, 0x0d, fj, rd) +# define MOVFR2GR_D(rd, fj) ou5rr(0x229, 0x0e, fj, rd) +# define MOVFRH2GR_S(rd, fj) ou5rr(0x229, 0x0f, fj, rd) +# define MOVGR2FCSR(fc, rj) ou5rr(0x229, 0x10, rj, fc) +# define MOVFCSR2GR(rd, fc) ou5rr(0x229, 0x12, fc, rd) +# define MOVFR2CF(cd, fj) o5r23(0x229, 0x14, fj, 0, cd) +# define MOVCF2FR(fd, cj) o523r(0x229, 0x15, 0, cj, fd) +# define MOVGR2CF(cd, fj) o5r23(0x229, 0x16, fj, 0, cd) +# define MOVCF2GR(fd, cj) o523r(0x229, 0x17, 0, cj, fd) +# define FCVT_S_D(fd, fj) ou5rr(0x232, 0x06, fj, fd) +# define FCVT_D_S(fd, fj) ou5rr(0x232, 0x09, fj, fd) +# define FTINTRM_W_S(fd, fj) ou5rr(0x234, 0x01, fj, fd) +# define FTINTRM_W_D(fd, fj) ou5rr(0x234, 0x02, fj, fd) +# define FTINTRM_L_S(fd, fj) ou5rr(0x234, 0x09, fj, fd) +# define FTINTRM_L_D(fd, fj) ou5rr(0x234, 0x0a, fj, fd) +# define FTINTRP_W_S(fd, fj) ou5rr(0x234, 0x11, fj, fd) +# define FTINTRP_W_D(fd, fj) ou5rr(0x234, 0x12, fj, fd) +# define FTINTRP_L_S(fd, fj) ou5rr(0x234, 0x19, fj, fd) +# define FTINTRP_L_D(fd, fj) ou5rr(0x234, 0x1a, fj, fd) +# define FTINTRZ_W_S(fd, fj) ou5rr(0x235, 0x01, fj, fd) +# define FTINTRZ_W_D(fd, fj) ou5rr(0x235, 0x02, fj, fd) +# define FTINTRZ_L_S(fd, fj) ou5rr(0x235, 0x09, fj, fd) +# define FTINTRZ_L_D(fd, fj) ou5rr(0x235, 0x0a, fj, fd) +# define FTINTRNE_W_S(fd, fj) ou5rr(0x235, 0x11, fj, fd) +# define FTINTRNE_W_D(fd, fj) ou5rr(0x235, 0x12, fj, fd) +# define FTINTRNE_L_S(fd, fj) ou5rr(0x235, 0x19, fj, fd) +# define FTINTRNE_L_D(fd, fj) ou5rr(0x235, 0x1a, fj, fd) +# define FTINT_W_S(fd, fj) ou5rr(0x236, 0x01, fj, fd) +# define FTINT_W_D(fd, fj) ou5rr(0x236, 0x02, fj, fd) +# define FTINT_L_S(fd, fj) ou5rr(0x236, 0x09, fj, fd) +# define FTINT_L_D(fd, fj) ou5rr(0x236, 0x0a, fj, fd) +# define FFINT_S_W(fd, fj) ou5rr(0x23a, 0x04, fj, fd) +# define FFINT_S_L(fd, fj) ou5rr(0x23a, 0x06, fj, fd) +# define FFINT_D_W(fd, fj) ou5rr(0x23a, 0x08, fj, fd) +# define FFINT_D_L(fd, fj) ou5rr(0x23a, 0x0a, fj, fd) +# define FRINT_S(fd, fj) ou5rr(0x23c, 0x11, fj, fd) +# define FRINT_D(fd, fj) ou5rr(0x23c, 0x12, fj, fd) +# define FMADD_S(fd, fj, fk, fa) orrrr(0x081, fa, fk, fj, fd) +# define FMADD_D(fd, fj, fk, fa) orrrr(0x082, fa, fk, fj, fd) +# define FMSUB_S(fd, fj, fk, fa) orrrr(0x085, fa, fk, fj, fd) +# define FMSUB_D(fd, fj, fk, fa) orrrr(0x086, fa, fk, fj, fd) +# define FNMADD_S(fd, fj, fk, fa) orrrr(0x089, fa, fk, fj, fd) +# define FNMADD_D(fd, fj, fk, fa) orrrr(0x08a, fa, fk, fj, fd) +# define FNMSUB_S(fd, fj, fk, fa) orrrr(0x08d, fa, fk, fj, fd) +# define FNMSUB_D(fd, fj, fk, fa) orrrr(0x08e, fa, fk, fj, fd) +/* No QNaN exception */ +# define _CAF 0x00 +# define _CUN 0x08 +# define _CEQ 0x04 +# define _CUEQ 0x0c +# define _CLT 0x02 +# define _CULT 0x0a +# define _CLE 0x06 +# define _CULE 0x0e +# define _CNE 0x10 +# define _COR 0x14 +# define _CUNE 0x18 +/* QNaN exception */ +# define _SAF 0x01 +# define _SUN 0x09 +# define _SEQ 0x05 +# define _SUEQ 0x0D +# define _SLT 0x03 +# define _SULT 0x0B +# define _SLE 0x07 +# define _SULE 0x0F +# define _SNE 0x11 +# define _SOR 0x15 +# define _SUNE 0x19 +# define FCMP_cond_S(cc, cd, fj, fk) ou5rru2u3(0x0c1, cc, fk, fj, 0, cd) +# define FCMP_CAF_S(cd, fj, fk) FCMP_cond_S( _CAF, cd, fj, fk) +# define FCMP_CUN_S(cd, fj, fk) FCMP_cond_S( _CUN, cd, fj, fk) +# define FCMP_CEQ_S(cd, fj, fk) FCMP_cond_S( _CEQ, cd, fj, fk) +# define FCMP_CUEQ_S(cd, fj, fk) FCMP_cond_S(_CUEQ, cd, fj, fk) +# define FCMP_CLT_S(cd, fj, fk) FCMP_cond_S( _CLT, cd, fj, fk) +# define FCMP_CULT_S(cd, fj, fk) FCMP_cond_S(_CULT, cd, fj, fk) +# define FCMP_CLE_S(cd, fj, fk) FCMP_cond_S( _CLE, cd, fj, fk) +# define FCMP_CULE_S(cd, fj, fk) FCMP_cond_S(_CULE, cd, fj, fk) +# define FCMP_CNE_S(cd, fj, fk) FCMP_cond_S( _CNE, cd, fj, fk) +# define FCMP_COR_S(cd, fj, fk) FCMP_cond_S( _COR, cd, fj, fk) +# define FCMP_CUNE_S(cd, fj, fk) FCMP_cond_S(_CUNE, cd, fj, fk) +# define FCMP_SAF_S(cd, fj, fk) FCMP_cond_S( _CAF, cd, fj, fk) +# define FCMP_SUN_S(cd, fj, fk) FCMP_cond_S( _CUN, cd, fj, fk) +# define FCMP_SEQ_S(cd, fj, fk) FCMP_cond_S( _CEQ, cd, fj, fk) +# define FCMP_SUEQ_S(cd, fj, fk) FCMP_cond_S(_CUEQ, cd, fj, fk) +# define FCMP_SLT_S(cd, fj, fk) FCMP_cond_S( _CLT, cd, fj, fk) +# define FCMP_SULT_S(cd, fj, fk) FCMP_cond_S(_CULT, cd, fj, fk) +# define FCMP_SLE_S(cd, fj, fk) FCMP_cond_S( _CLE, cd, fj, fk) +# define FCMP_SULE_S(cd, fj, fk) FCMP_cond_S(_CULE, cd, fj, fk) +# define FCMP_SNE_S(cd, fj, fk) FCMP_cond_S( _CNE, cd, fj, fk) +# define FCMP_SOR_S(cd, fj, fk) FCMP_cond_S( _COR, cd, fj, fk) +# define FCMP_SUNE_S(cd, fj, fk) FCMP_cond_S(_CUNE, cd, fj, fk) +# define FCMP_cond_D(cc, cd, fj, fk) ou5rru2u3(0x0c2, cc, fk, fj, 0, cd) +# define FCMP_CAF_D(cd, fj, fk) FCMP_cond_D( _CAF, cd, fj, fk) +# define FCMP_CUN_D(cd, fj, fk) FCMP_cond_D( _CUN, cd, fj, fk) +# define FCMP_CEQ_D(cd, fj, fk) FCMP_cond_D( _CEQ, cd, fj, fk) +# define FCMP_CUEQ_D(cd, fj, fk) FCMP_cond_D(_CUEQ, cd, fj, fk) +# define FCMP_CLT_D(cd, fj, fk) FCMP_cond_D( _CLT, cd, fj, fk) +# define FCMP_CULT_D(cd, fj, fk) FCMP_cond_D(_CULT, cd, fj, fk) +# define FCMP_CLE_D(cd, fj, fk) FCMP_cond_D( _CLE, cd, fj, fk) +# define FCMP_CULE_D(cd, fj, fk) FCMP_cond_D(_CULE, cd, fj, fk) +# define FCMP_CNE_D(cd, fj, fk) FCMP_cond_D( _CNE, cd, fj, fk) +# define FCMP_COR_D(cd, fj, fk) FCMP_cond_D( _COR, cd, fj, fk) +# define FCMP_CUNE_D(cd, fj, fk) FCMP_cond_D(_CUNE, cd, fj, fk) +# define FCMP_SAF_D(cd, fj, fk) FCMP_cond_D( _CAF, cd, fj, fk) +# define FCMP_SUN_D(cd, fj, fk) FCMP_cond_D( _CUN, cd, fj, fk) +# define FCMP_SEQ_D(cd, fj, fk) FCMP_cond_D( _CEQ, cd, fj, fk) +# define FCMP_SUEQ_D(cd, fj, fk) FCMP_cond_D(_CUEQ, cd, fj, fk) +# define FCMP_SLT_D(cd, fj, fk) FCMP_cond_D( _CLT, cd, fj, fk) +# define FCMP_SULT_D(cd, fj, fk) FCMP_cond_D(_CULT, cd, fj, fk) +# define FCMP_SLE_D(cd, fj, fk) FCMP_cond_D( _CLE, cd, fj, fk) +# define FCMP_SULE_D(cd, fj, fk) FCMP_cond_D(_CULE, cd, fj, fk) +# define FCMP_SNE_D(cd, fj, fk) FCMP_cond_D( _CNE, cd, fj, fk) +# define FCMP_SOR_D(cd, fj, fk) FCMP_cond_D( _COR, cd, fj, fk) +# define FCMP_SUNE_D(cd, fj, fk) FCMP_cond_D(_CUNE, cd, fj, fk) +# define FSEL(fd, fj, fk, u3) ou3rrr(0x340, u3, fk, fj, fd) +# define FLD_S(fd, rj, si12) os12rr(0x0ac, si12, rj, fd) +# define FST_S(fd, rj, si12) os12rr(0x0ad, si12, rj, fd) +# define FLD_D(fd, rj, si12) os12rr(0x0ae, si12, rj, fd) +# define FST_D(fd, rj, si12) os12rr(0x0af, si12, rj, fd) +# define FLDX_S(fd, rj, rk) orrr(0x7060, rk, rj, fd) +# define FLDX_D(fd, rj, rk) orrr(0x7068, rk, rj, fd) +# define FSTX_S(fd, rj, rk) orrr(0x7070, rk, rj, fd) +# define FSTX_D(fd, rj, rk) orrr(0x7078, rk, rj, fd) +# define FLDGT_S(fd, rj, rk) orrr(0x70e8, rk, rj, fd) +# define FLDGT_D(fd, rj, rk) orrr(0x70e9, rk, rj, fd) +# define FLDLE_S(fd, rj, rk) orrr(0x70ea, rk, rj, fd) +# define FLDLE_D(fd, rj, rk) orrr(0x70eb, rk, rj, fd) +# define FSTGT_S(fd, rj, rk) orrr(0x70ec, rk, rj, fd) +# define FSTGT_D(fd, rj, rk) orrr(0x70ed, rk, rj, fd) +# define FSTLE_S(fd, rj, rk) orrr(0x70ee, rk, rj, fd) +# define FSTLE_D(fd, rj, rk) orrr(0x70ef, rk, rj, fd) +# define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) +static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); +# define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) +static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); +# define truncr_f_l(r0, r1) _truncr_f_l(_jit, r0, r1) +static void _truncr_f_l(jit_state_t*, jit_int32_t, jit_int32_t); +# define truncr_d_l(r0, r1) _truncr_d_l(_jit, r0, r1) +static void _truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t); +# define addr_f(r0, r1, r2) FADD_S(r0, r1, r2) +# define addi_f(r0, r1, i0) _addi_f(_jit, r0, r1, i0) +static void _addi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define subr_f(r0, r1, r2) FSUB_S(r0, r1, r2) +# define subi_f(r0, r1, i0) _subi_f(_jit, r0, r1, i0) +static void _subi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define rsbr_f(r0, r1, r2) FSUB_S(r0, r2, r1) +# define rsbi_f(r0, r1, i0) _rsbi_f(_jit, r0, r1, i0) +static void _rsbi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define mulr_f(r0, r1, r2) FMUL_S(r0, r1, r2) +# define muli_f(r0, r1, i0) _muli_f(_jit, r0, r1, i0) +static void _muli_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define divr_f(r0, r1, r2) FDIV_S(r0, r1, r2) +# define divi_f(r0, r1, i0) _divi_f(_jit, r0, r1, i0) +static void _divi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define absr_f(r0, r1) FABS_S(r0, r1) +# define negr_f(r0, r1) FNEG_S(r0, r1) +# define sqrtr_f(r0, r1) FSQRT_S(r0, r1) +# define extr_f(r0, r1) _extr_f(_jit, r0, r1) +static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# define ldr_f(r0, r1) FLD_S(r0, r1, 0) +# define ldi_f(r0, i0) _ldi_f(_jit, r0, i0) +static void _ldi_f(jit_state_t*, jit_int32_t, jit_word_t); +# define ldxr_f(r0, r1, r2) FLDX_S(r0, r1, r2) +# define ldxi_f(r0, r1, i0) _ldxi_f(_jit, r0, r1, i0) +static void _ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define str_f(r0, r1) FST_S(r1, r0, 0) +# define sti_f(i0, r0) _sti_f(_jit, i0, r0) +static void _sti_f(jit_state_t*, jit_word_t, jit_int32_t); +# define stxr_f(r0, r1, r2) FSTX_S(r2, r1, r0) +# define stxi_f(i0, r0, r1) _stxi_f(_jit, i0, r0, r1) +static void _stxi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define movr_f(r0, r1) FMOV_S(r0, r1) +# define movi_f(r0, i0) _movi_f(_jit, r0, i0) +static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t); +# define movr_f_w(r0, r1) MOVFR2GR_S(r0, r1) +# define movi_f_w(r0, im) _movi_f_w(_jit, r0, im) +static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t); +# define movr_w_f(r0, r1) MOVGR2FR_W(r0, r1) +# define extr_d_f(r0, r1) FCVT_S_D(r0, r1) +# define ltr_f(r0, r1, r2) _ltr_f(_jit, r0, r1, r2) +static void _ltr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lti_f(r0, r1, i0) _lti_f(_jit, r0, r1, i0) +static void _lti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ler_f(r0, r1, r2) _ler_f(_jit, r0, r1, r2) +static void _ler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lei_f(r0, r1, i0) _lei_f(_jit, r0, r1, i0) +static void _lei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define eqr_f(r0, r1, r2) _eqr_f(_jit, r0, r1, r2) +static void _eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define eqi_f(r0, r1, i0) _eqi_f(_jit, r0, r1, i0) +static void _eqi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ger_f(r0, r1, r2) _ger_f(_jit, r0, r1, r2) +static void _ger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gei_f(r0, r1, i0) _gei_f(_jit, r0, r1, i0) +static void _gei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define gtr_f(r0, r1, r2) _gtr_f(_jit, r0, r1, r2) +static void _gtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gti_f(r0, r1, i0) _gti_f(_jit, r0, r1, i0) +static void _gti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ner_f(r0, r1, r2) _ner_f(_jit, r0, r1, r2) +static void _ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define nei_f(r0, r1, i0) _nei_f(_jit, r0, r1, i0) +static void _nei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define unltr_f(r0, r1, r2) _unltr_f(_jit, r0, r1, r2) +static void _unltr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unlti_f(r0, r1, i0) _unlti_f(_jit, r0, r1, i0) +static void _unlti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define unler_f(r0, r1, r2) _unler_f(_jit, r0, r1, r2) +static void _unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unlei_f(r0, r1, i1) _unlei_f(_jit, r0, r1, i1) +static void _unlei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define uneqr_f(r0, r1, r2) _uneqr_f(_jit, r0, r1, r2) +static void _uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define uneqi_f(r0, r1, i0) _uneqi_f(_jit, r0, r1, i0) +static void _uneqi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define unger_f(r0, r1, r2) _unger_f(_jit, r0, r1, r2) +static void _unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ungei_f(r0, r1, i0) _ungei_f(_jit, r0, r1, i0) +static void _ungei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ungtr_f(r0, r1, r2) _ungtr_f(_jit, r0, r1, r2) +static void _ungtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ungti_f(r0, r1, i0) _ungti_f(_jit, r0, r1, i0) +static void _ungti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ltgtr_f(r0, r1, r2) _ltgtr_f(_jit, r0, r1, r2) +static void _ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ltgti_f(r0, r1, i0) _ltgti_f(_jit, r0, r1, i0) +static void _ltgti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define ordr_f(r0, r1, r2) _ordr_f(_jit, r0, r1, r2) +static void _ordr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ordi_f(r0, r1, i0) _ordi_f(_jit, r0, r1, i0) +static void _ordi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define unordr_f(r0, r1, r2) _unordr_f(_jit, r0, r1, r2) +static void _unordr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unordi_f(r0, r1, i0) _unordi_f(_jit, r0, r1, i0) +static void _unordi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); +# define bltr_f(i0, r0, r1) _bltr_f(_jit, i0, r0, r1) +static jit_word_t _bltr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blti_f(i0, r0, i1) _blti_f(_jit, i0, r0, i1) +static jit_word_t _blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define bler_f(i0, r0, r1) _bler_f(_jit, i0, r0, r1) +static jit_word_t _bler_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blei_f(i0, r0, i1) _blei_f(_jit, i0, r0, i1) +static jit_word_t _blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define beqr_f(i0, r0, r1) _beqr_f(_jit, i0, r0, r1) +static jit_word_t _beqr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define beqi_f(i0, r0, i1) _beqi_f(_jit, i0, r0, i1) +static jit_word_t _beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define bger_f(i0, r0, r1) _bger_f(_jit, i0, r0, r1) +static jit_word_t _bger_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgei_f(i0, r0, i1) _bgei_f(_jit, i0, r0, i1) +static jit_word_t _bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define bgtr_f(i0, r0, r1) _bgtr_f(_jit,i0, r0, r1) +static jit_word_t _bgtr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgti_f(i0, r0, i1) _bgti_f(_jit, i0, r0, i1) +static jit_word_t _bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define bner_f(i0, r0, r1) _bner_f(_jit, i0, r0, r1) +static jit_word_t _bner_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bnei_f(i0, r0, i1) _bnei_f(_jit, i0, r0, i1) +static jit_word_t _bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t); +# define bunltr_f(i0, r0, r1) _bunltr_f(_jit, i0, r0, r1) +static jit_word_t _bunltr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bunlti_f(i0, r0, i1) _bunlti_f(_jit, i0, r0, i1) +static jit_word_t _bunlti_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define bunler_f(i0, r0, r1) _bunler_f(_jit, i0, r0, r1) +static jit_word_t _bunler_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bunlei_f(i0, r0, i1) _bunlei_f(_jit, i0, r0, i1) +static jit_word_t _bunlei_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define buneqr_f(i0, r0, r1) _buneqr_f(_jit, i0, r0, r1) +static jit_word_t _buneqr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define buneqi_f(i0, r0, i1) _buneqi_f(_jit, i0, r0, i1) +static jit_word_t _buneqi_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define bunger_f(i0, r0, r1) _bunger_f(_jit, i0, r0, r1) +static jit_word_t _bunger_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bungei_f(i0, r0, i1) _bungei_f(_jit, i0, r0, i1) +static jit_word_t _bungei_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define bungtr_f(i0, r0, r1) _bungtr_f(_jit, i0, r0, r1) +static jit_word_t _bungtr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bungti_f(i0, r0, i1) _bungti_f(_jit, i0, r0, i1) +static jit_word_t _bungti_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define bltgtr_f(i0, r0, r1) _bltgtr_f(_jit, i0, r0, r1) +static jit_word_t _bltgtr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bltgti_f(i0, r0, i1) _bltgti_f(_jit, i0, r0, i1) +static jit_word_t _bltgti_f(jit_state_t*, jit_word_t,jit_int32_t,jit_float32_t); +# define bordr_f(i0, r0, r1) _bordr_f(_jit, i0, r0, r1) +static jit_word_t _bordr_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bordi_f(i0, r0, i1) _bordi_f(_jit, i0, r0, i1) +static jit_word_t _bordi_f(jit_state_t*, jit_word_t, jit_int32_t,jit_float32_t); +# define bunordr_f(i0, r0, r1) _bunordr_f(_jit, i0, r0, r1) +static jit_word_t _bunordr_f(jit_state_t*, jit_word_t, jit_int32_t,jit_int32_t); +# define bunordi_f(i0, r0, i1) _bunordi_f(_jit, i0,r0, i1) +static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t); +# define addr_d(r0, r1, r2) FADD_D(r0, r1, r2) +# define addi_d(r0, r1, i0) _addi_d(_jit, r0, r1, i0) +static void _addi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define subr_d(r0, r1, r2) FSUB_D(r0, r1, r2) +# define subi_d(r0, r1, i0) _subi_d(_jit, r0, r1, i0) +static void _subi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define rsbr_d(r0, r1, r2) FSUB_D(r0, r2, r1) +# define rsbi_d(r0, r1, i0) _rsbi_d(_jit, r0, r1, i0) +static void _rsbi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define mulr_d(r0, r1, r2) FMUL_D(r0, r1, r2) +# define muli_d(r0, r1, i0) _muli_d(_jit, r0, r1, i0) +static void _muli_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define divr_d(r0, r1, r2) FDIV_D(r0, r1, r2) +# define divi_d(r0, r1, i0) _divi_d(_jit, r0, r1, i0) +static void _divi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define absr_d(r0, r1) FABS_D(r0, r1) +# define negr_d(r0, r1) FNEG_D(r0, r1) +# define sqrtr_d(r0, r1) FSQRT_D(r0, r1) +# define extr_d(r0, r1) _extr_d(_jit, r0, r1) +static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); +# define ldr_d(r0, r1) FLD_D(r0, r1, 0) +# define ldi_d(r0, i0) _ldi_d(_jit, r0, i0) +static void _ldi_d(jit_state_t*, jit_int32_t, jit_word_t); +# define ldxr_d(r0, r1, r2) FLDX_D(r0, r1, r2) +# define ldxi_d(r0, r1, i0) _ldxi_d(_jit, r0, r1, i0) +static void _ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define str_d(r0, r1) FST_D(r1, r0, 0) +# define sti_d(i0, r0) _sti_d(_jit, i0, r0) +static void _sti_d(jit_state_t*, jit_word_t, jit_int32_t); +# define stxr_d(r0, r1, r2) FSTX_D(r2, r1, r0) +# define stxi_d(i0, r0, r1) _stxi_d(_jit, i0, r0, r1) +static void _stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define movr_d(r0, r1) FMOV_D(r0, r1) +# define movi_d(r0, i0) _movi_d(_jit, r0, i0) +static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t); +# define movr_d_w(r0, r1) MOVFR2GR_D(r0, r1) +# define movi_d_w(r0, im) _movi_d_w(_jit, r0, im) +static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t); +# define movr_w_d(r0, r1) MOVGR2FR_D(r0, r1) +# define extr_f_d(r0, r1) FCVT_D_S(r0, r1) +# define ltr_d(r0, r1, r2) _ltr_d(_jit, r0, r1, r2) +static void _ltr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lti_d(r0, r1, i0) _lti_d(_jit, r0, r1, i0) +static void _lti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ler_d(r0, r1, r2) _ler_d(_jit, r0, r1, r2) +static void _ler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lei_d(r0, r1, i0) _lei_d(_jit, r0, r1, i0) +static void _lei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define eqr_d(r0, r1, r2) _eqr_d(_jit, r0, r1, r2) +static void _eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define eqi_d(r0, r1, i0) _eqi_d(_jit, r0, r1, i0) +static void _eqi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ger_d(r0, r1, r2) _ger_d(_jit, r0, r1, r2) +static void _ger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gei_d(r0, r1, i0) _gei_d(_jit, r0, r1, i0) +static void _gei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define gtr_d(r0, r1, r2) _gtr_d(_jit, r0, r1, r2) +static void _gtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define gti_d(r0, r1, i0) _gti_d(_jit, r0, r1, i0) +static void _gti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ner_d(r0, r1, r2) _ner_d(_jit, r0, r1, r2) +static void _ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define nei_d(r0, r1, i0) _nei_d(_jit, r0, r1, i0) +static void _nei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define unltr_d(r0, r1, r2) _unltr_d(_jit, r0, r1, r2) +static void _unltr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unlti_d(r0, r1, i0) _unlti_d(_jit, r0, r1, i0) +static void _unlti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define unler_d(r0, r1, r2) _unler_d(_jit, r0, r1, r2) +static void _unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unlei_d(r0, r1, i0) _unlei_d(_jit, r0, r1, i0) +static void _unlei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define uneqr_d(r0, r1, r2) _uneqr_d(_jit, r0, r1, r2) +static void _uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define uneqi_d(r0, r1, i0) _uneqi_d(_jit, r0, r1, i0) +static void _uneqi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define unger_d(r0, r1, r2) _unger_d(_jit, r0, r1, r2) +static void _unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ungei_d(r0, r1, i0) _ungei_d(_jit, r0, r1, i0) +static void _ungei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ungtr_d(r0, r1, r2) _ungtr_d(_jit, r0, r1, r2) +static void _ungtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ungti_d(r0, r1, i0) _ungti_d(_jit, r0, r1, i0) +static void _ungti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ltgtr_d(r0, r1, r2) _ltgtr_d(_jit, r0, r1, r2) +static void _ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ltgti_d(r0, r1, i0) _ltgti_d(_jit, r0, r1, i0) +static void _ltgti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define ordr_d(r0, r1, r2) _ordr_d(_jit, r0, r1, r2) +static void _ordr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ordi_d(r0, r1, i0) _ordi_d(_jit, r0, r1, i0) +static void _ordi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define unordr_d(r0, r1, r2) _unordr_d(_jit, r0, r1, r2) +static void _unordr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define unordi_d(r0, r1, i0) _unordi_d(_jit, r0, r1, i0) +static void _unordi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +# define bltr_d(i0, r0, r1) _bltr_d(_jit, i0, r0, r1) +static jit_word_t _bltr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blti_d(i0, r0, i1) _blti_d(_jit, i0, r0, i1) +static jit_word_t _blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define bler_d(i0, r0, r1) _bler_d(_jit, i0, r0, r1) +static jit_word_t _bler_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define blei_d(i0, r0, i1) _blei_d(_jit, i0,r0, i1) +static jit_word_t _blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define beqr_d(i0, r0, r1) _beqr_d(_jit, i0, r0, r1) +static jit_word_t _beqr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define beqi_d(i0, r0, i1) _beqi_d(_jit, i0, r0, i1) +static jit_word_t _beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define bger_d(i0, r0, r1) _bger_d(_jit, i0, r0, r1) +static jit_word_t _bger_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgei_d(i0, r0, i1) _bgei_d(_jit, i0, r0, i1) +static jit_word_t _bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define bgtr_d(i0, r0, r1) _bgtr_d(_jit, i0, r0, r1) +static jit_word_t _bgtr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bgti_d(i0, r0, i1) _bgti_d(_jit, i0, r0, i1) +static jit_word_t _bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define bner_d(i0, r0, r1) _bner_d(_jit, i0, r0, r1) +static jit_word_t _bner_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bnei_d(i0, r0, i1) _bnei_d(_jit, i0, r0, i1) +static jit_word_t _bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t); +# define bunltr_d(i0, r0, r1) _bunltr_d(_jit, i0, r0, r1) +static jit_word_t _bunltr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bunlti_d(i0, r0, i1) _bunlti_d(_jit, i0, r0, i1) +static jit_word_t _bunlti_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define bunler_d(i0, r0, r1) _bunler_d(_jit, i0, r0, r1) +static jit_word_t _bunler_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bunlei_d(i0, r0, i1) _bunlei_d(_jit, i0, r0, i1) +static jit_word_t _bunlei_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define buneqr_d(i0, r0, r1) _buneqr_d(_jit, i0, r0, r1) +static jit_word_t _buneqr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define buneqi_d(i0, r0, i1) _buneqi_d(_jit, i0,r0, i1) +static jit_word_t _buneqi_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define bunger_d(i0, r0, r1) _bunger_d(_jit, i0, r0, r1) +static jit_word_t _bunger_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bungei_d(i0, r0, i1) _bungei_d(_jit, i0, r0, i1) +static jit_word_t _bungei_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define bungtr_d(i0, r0, r1) _bungtr_d(_jit, i0, r0, r1) +static jit_word_t _bungtr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bungti_d(i0, r0, i1) _bungti_d(_jit, i0, r0, i1) +static jit_word_t _bungti_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define bltgtr_d(i0, r0, r1) _bltgtr_d(_jit, i0, r0, r1) +static jit_word_t _bltgtr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bltgti_d(i0, r0, i1) _bltgti_d(_jit, i0, r0, i1) +static jit_word_t _bltgti_d(jit_state_t*, jit_word_t,jit_int32_t,jit_float64_t); +# define bordr_d(i0, r0, r1) _bordr_d(_jit, i0, r0, r1) +static jit_word_t _bordr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define bordi_d(i0, r0, i1) _bordi_d(_jit, i0, r0, i1) +static jit_word_t _bordi_d(jit_state_t*, jit_word_t, jit_int32_t,jit_float64_t); +# define bunordr_d(i0, r0, r1) _bunordr_d(_jit, i0, r0, r1) +static jit_word_t _bunordr_d(jit_state_t*, jit_word_t, jit_int32_t,jit_int32_t); +# define bunordi_d(i0, r0, i1) _bunordi_d(_jit, i0, r0, i1) +static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t); +# define vaarg_d(r0, r1) _vaarg_d(_jit, r0, r1) +static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t); +#endif /* PROTO */ + +#if CODE +static void +_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_fpr); + FTINTRZ_W_S(rn(reg), r1); + MOVFR2GR_S(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_fpr); + FTINTRZ_W_D(rn(reg), r1); + MOVFR2GR_S(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_fpr); + FTINTRZ_L_S(rn(reg), r1); + MOVFR2GR_D(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_fpr); + FTINTRZ_L_D(rn(reg), r1); + MOVFR2GR_D(r0, rn(reg)); + jit_unget_reg(reg); +} + +# define fpr_opi(name, type, size) \ +static void \ +_##name##i_##type(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, \ + jit_float##size##_t i0) \ +{ \ + jit_int32_t reg = jit_get_reg(jit_class_fpr); \ + movi_##type(rn(reg), i0); \ + name##r_##type(r0, r1, rn(reg)); \ + jit_unget_reg(reg); \ +} +# define fopi(name) fpr_opi(name, f, 32) +# define dopi(name) fpr_opi(name, d, 64) + +# define fpr_bopi(name, type, size) \ +static jit_word_t \ +_b##name##i_##type(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, \ + jit_float##size##_t i1) \ +{ \ + jit_word_t word; \ + jit_int32_t reg = jit_get_reg(jit_class_fpr| \ + jit_class_nospill); \ + movi_##type(rn(reg), i1); \ + word = b##name##r_##type(i0, r0, rn(reg)); \ + jit_unget_reg(reg); \ + return (word); \ +} +# define fbopi(name) fpr_bopi(name, f, 32) +# define dbopi(name) fpr_bopi(name, d, 64) + +fopi(add) +fopi(sub) +fopi(rsb) +fopi(mul) +fopi(div) + +static void +_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + MOVGR2FR_D(r0, r1); + FFINT_S_L(r0, r0); +} + +static void +_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FLD_S(r0, _ZERO_REGNO, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + ldr_f(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FLD_S(r0, r1, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + ldxr_f(r0, r1, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FST_S(r0, _ZERO_REGNO, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + str_f(rn(t0), r0); + jit_unget_reg(t0); + } +} + +static void +_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FST_S(r1, r0, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + stxr_f(rn(t0), r0, r1); + jit_unget_reg(t0); + } +} + +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + data.f = i0; + if (data.i == 0) + movr_w_f(r0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + movr_w_f(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + data.f = i0; + movi(r0, data.i); +} + +static void +_ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLT_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(lt) + +static void +_ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLE_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(le) + +static void +_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CEQ_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(eq) + +static void +_ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLE_S(0, r2, r1); + MOVCF2GR(r0, 0); +} +fopi(ge) + +static void +_gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLT_S(0, r2, r1); + MOVCF2GR(r0, 0); +} +fopi(gt) + +static void +_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CUNE_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(ne) + +static void +_unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULT_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(unlt) + +static void +_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULE_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(unle) + +static void +_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CUEQ_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(uneq) + +static void +_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULE_S(0, r2, r1); + MOVCF2GR(r0, 0); +} +fopi(unge) + +static void +_ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULT_S(0, r2, r1); + MOVCF2GR(r0, 0); +} +fopi(ungt) + +static void +_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CNE_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(ltgt) + +static void +_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_COR_S(0, r1, r2); + MOVCF2GR(r0, 0); +} +fopi(ord) + +static void +_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_COR_S(0, r1, r2); + MOVCF2GR(r0, 0); + XORI(r0, r0, 1); +} +fopi(unord) + +static jit_word_t +_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLT_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(lt) + +static jit_word_t +_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLE_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(le) + +static jit_word_t +_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CEQ_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(eq) + +static jit_word_t +_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLE_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(ge) + +static jit_word_t +_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLT_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(gt) + +static jit_word_t +_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CUNE_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(ne) + +static jit_word_t +_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULT_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(unlt) + +static jit_word_t +_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULE_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(unle) + +static jit_word_t +_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CUEQ_S(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(uneq) + +static jit_word_t +_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULE_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(unge) + +static jit_word_t +_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULT_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(ungt) + +static jit_word_t +_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CNE_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(ltgt) + +static jit_word_t +_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_COR_S(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(ord) + +static jit_word_t +_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_COR_S(0, r2, r1); + w = _jit->pc.w; + BCEQZ(0, (i0 - w) >> 2); + return (w); +} +fbopi(unord) + +dopi(add) +dopi(sub) +dopi(rsb) +dopi(mul) +dopi(div) + +static void +_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + MOVGR2FR_D(r0, r1); + FFINT_D_L(r0, r0); +} + +static void +_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FLD_D(r0, _ZERO_REGNO, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + ldr_d(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FLD_D(r0, r1, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + ldxr_d(r0, r1, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FST_D(r0, _ZERO_REGNO, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + str_d(rn(t0), r0); + jit_unget_reg(t0); + } +} + +static void +_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0; + if (can_sign_extend_si12_p(i0)) + FST_D(r1, r0, i0); + else { + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + stxr_d(rn(t0), r0, r1); + jit_unget_reg(t0); + } +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + union { + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + data.d = i0; + if (data.w == 0) + movr_w_d(r0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.w); + movr_w_d(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + union { + jit_int64_t l; + jit_float64_t d; + } data; + data.d = i0; + movi(r0, data.l); +} + +static void +_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLT_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(lt) + +static void +_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLE_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(le) + +static void +_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CEQ_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(eq) + +static void +_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLE_D(0, r2, r1); + MOVCF2GR(r0, 0); +} +dopi(ge) + +static void +_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_SLT_D(0, r2, r1); + MOVCF2GR(r0, 0); +} +dopi(gt) + +static void +_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CUNE_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(ne) + +static void +_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULT_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(unlt) + +static void +_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULE_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(unle) + +static void +_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CUEQ_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(uneq) + +static void +_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULE_D(0, r2, r1); + MOVCF2GR(r0, 0); +} +dopi(unge) + +static void +_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CULT_D(0, r2, r1); + MOVCF2GR(r0, 0); +} +dopi(ungt) + +static void +_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_CNE_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(ltgt) + +static void +_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_COR_D(0, r1, r2); + MOVCF2GR(r0, 0); +} +dopi(ord) + +static void +_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMP_COR_D(0, r1, r2); + MOVCF2GR(r0, 0); + XORI(r0, r0, 1); +} +dopi(unord) + +static jit_word_t +_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLT_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(lt) + +static jit_word_t +_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLE_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(le) + +static jit_word_t +_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CEQ_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(eq) + +static jit_word_t +_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLE_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(ge) + +static jit_word_t +_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_SLT_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(gt) + +static jit_word_t +_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CUNE_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(ne) + +static jit_word_t +_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULT_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(unlt) + +static jit_word_t +_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULE_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(unle) + +static jit_word_t +_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CUEQ_D(0, r1, r2); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(uneq) + +static jit_word_t +_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULE_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(unge) + +static jit_word_t +_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CULT_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(ungt) + +static jit_word_t +_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_CNE_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(ltgt) + +static jit_word_t +_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_COR_D(0, r2, r1); + w = _jit->pc.w; + BCNEZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(ord) + +static jit_word_t +_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMP_COR_D(0, r2, r1); + w = _jit->pc.w; + BCEQZ(0, (i0 - w) >> 2); + return (w); +} +dbopi(unord) + +static void +_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(_jitc->function->self.call & jit_call_varargs); + /* Load argument. */ + ldr_d(r0, r1); + /* Update va_list. */ + addi(r1, r1, sizeof(jit_float64_t)); +} + +#endif /* CODE */ diff --git a/deps/lightning/lib/jit_loongarch-sz.c b/deps/lightning/lib/jit_loongarch-sz.c new file mode 100644 index 000000000..b2cab4d08 --- /dev/null +++ b/deps/lightning/lib/jit_loongarch-sz.c @@ -0,0 +1,448 @@ +#if __WORDSIZE == 64 +#define JIT_INSTR_MAX 88 + 0, /* data */ + 0, /* live */ + 28, /* align */ + 0, /* save */ + 0, /* load */ + 4, /* skip */ + 0, /* #name */ + 0, /* #note */ + 0, /* label */ + 88, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 20, /* addi */ + 12, /* addcr */ + 28, /* addci */ + 28, /* addxr */ + 28, /* addxi */ + 4, /* subr */ + 20, /* subi */ + 12, /* subcr */ + 28, /* subci */ + 28, /* subxr */ + 28, /* subxi */ + 24, /* rsbi */ + 4, /* mulr */ + 20, /* muli */ + 12, /* qmulr */ + 24, /* qmuli */ + 12, /* qmulr_u */ + 24, /* qmuli_u */ + 4, /* divr */ + 20, /* divi */ + 4, /* divr_u */ + 20, /* divi_u */ + 12, /* qdivr */ + 12, /* qdivi */ + 12, /* qdivr_u */ + 12, /* qdivi_u */ + 4, /* remr */ + 20, /* remi */ + 4, /* remr_u */ + 20, /* remi_u */ + 4, /* andr */ + 20, /* andi */ + 4, /* orr */ + 20, /* ori */ + 4, /* xorr */ + 20, /* xori */ + 4, /* lshr */ + 4, /* lshi */ + 4, /* rshr */ + 4, /* rshi */ + 4, /* rshr_u */ + 4, /* rshi_u */ + 4, /* negr */ + 4, /* comr */ + 4, /* ltr */ + 4, /* lti */ + 4, /* ltr_u */ + 4, /* lti_u */ + 8, /* ler */ + 4, /* lei */ + 8, /* ler_u */ + 4, /* lei_u */ + 12, /* eqr */ + 12, /* eqi */ + 8, /* ger */ + 8, /* gei */ + 8, /* ger_u */ + 8, /* gei_u */ + 4, /* gtr */ + 8, /* gti */ + 4, /* gtr_u */ + 8, /* gti_u */ + 8, /* ner */ + 8, /* nei */ + 4, /* movr */ + 16, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 32, /* casr */ + 44, /* casi */ + 4, /* extr_c */ + 4, /* extr_uc */ + 4, /* extr_s */ + 4, /* extr_us */ + 4, /* extr_i */ + 4, /* extr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ + 8, /* htonr_us */ + 8, /* htonr_ui */ + 4, /* htonr_ul */ + 4, /* ldr_c */ + 20, /* ldi_c */ + 4, /* ldr_uc */ + 20, /* ldi_uc */ + 4, /* ldr_s */ + 20, /* ldi_s */ + 4, /* ldr_us */ + 20, /* ldi_us */ + 4, /* ldr_i */ + 20, /* ldi_i */ + 4, /* ldr_ui */ + 20, /* ldi_ui */ + 4, /* ldr_l */ + 20, /* ldi_l */ + 4, /* ldxr_c */ + 16, /* ldxi_c */ + 4, /* ldxr_uc */ + 16, /* ldxi_uc */ + 4, /* ldxr_s */ + 16, /* ldxi_s */ + 4, /* ldxr_us */ + 16, /* ldxi_us */ + 4, /* ldxr_i */ + 16, /* ldxi_i */ + 4, /* ldxr_ui */ + 16, /* ldxi_ui */ + 4, /* ldxr_l */ + 16, /* ldxi_l */ + 4, /* str_c */ + 20, /* sti_c */ + 4, /* str_s */ + 20, /* sti_s */ + 4, /* str_i */ + 20, /* sti_i */ + 4, /* str_l */ + 20, /* sti_l */ + 4, /* stxr_c */ + 16, /* stxi_c */ + 4, /* stxr_s */ + 16, /* stxi_s */ + 4, /* stxr_i */ + 16, /* stxi_i */ + 4, /* stxr_l */ + 16, /* stxi_l */ + 4, /* bltr */ + 8, /* blti */ + 4, /* bltr_u */ + 8, /* blti_u */ + 8, /* bler */ + 12, /* blei */ + 8, /* bler_u */ + 12, /* blei_u */ + 4, /* beqr */ + 20, /* beqi */ + 4, /* bger */ + 8, /* bgei */ + 4, /* bger_u */ + 8, /* bgei_u */ + 8, /* bgtr */ + 12, /* bgti */ + 8, /* bgtr_u */ + 12, /* bgti_u */ + 4, /* bner */ + 20, /* bnei */ + 8, /* bmsr */ + 8, /* bmsi */ + 8, /* bmcr */ + 8, /* bmci */ + 32, /* boaddr */ + 36, /* boaddi */ + 16, /* boaddr_u */ + 20, /* boaddi_u */ + 32, /* bxaddr */ + 36, /* bxaddi */ + 16, /* bxaddr_u */ + 20, /* bxaddi_u */ + 32, /* bosubr */ + 36, /* bosubi */ + 16, /* bosubr_u */ + 20, /* bosubi_u */ + 32, /* bxsubr */ + 36, /* bxsubi */ + 16, /* bxsubr_u */ + 20, /* bxsubi_u */ + 4, /* jmpr */ + 20, /* jmpi */ + 4, /* callr */ + 20, /* calli */ + 0, /* prepare */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 88, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 4, /* addr_f */ + 12, /* addi_f */ + 4, /* subr_f */ + 12, /* subi_f */ + 12, /* rsbi_f */ + 4, /* mulr_f */ + 12, /* muli_f */ + 4, /* divr_f */ + 12, /* divi_f */ + 4, /* negr_f */ + 4, /* absr_f */ + 4, /* sqrtr_f */ + 8, /* ltr_f */ + 16, /* lti_f */ + 8, /* ler_f */ + 16, /* lei_f */ + 8, /* eqr_f */ + 16, /* eqi_f */ + 8, /* ger_f */ + 16, /* gei_f */ + 8, /* gtr_f */ + 16, /* gti_f */ + 8, /* ner_f */ + 16, /* nei_f */ + 8, /* unltr_f */ + 16, /* unlti_f */ + 8, /* unler_f */ + 16, /* unlei_f */ + 8, /* uneqr_f */ + 16, /* uneqi_f */ + 8, /* unger_f */ + 16, /* ungei_f */ + 8, /* ungtr_f */ + 16, /* ungti_f */ + 8, /* ltgtr_f */ + 16, /* ltgti_f */ + 8, /* ordr_f */ + 16, /* ordi_f */ + 12, /* unordr_f */ + 20, /* unordi_f */ + 8, /* truncr_f_i */ + 8, /* truncr_f_l */ + 8, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 8, /* movi_f */ + 4, /* ldr_f */ + 20, /* ldi_f */ + 4, /* ldxr_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 20, /* sti_f */ + 4, /* stxr_f */ + 16, /* stxi_f */ + 8, /* bltr_f */ + 16, /* blti_f */ + 8, /* bler_f */ + 16, /* blei_f */ + 8, /* beqr_f */ + 16, /* beqi_f */ + 8, /* bger_f */ + 16, /* bgei_f */ + 8, /* bgtr_f */ + 16, /* bgti_f */ + 8, /* bner_f */ + 16, /* bnei_f */ + 8, /* bunltr_f */ + 16, /* bunlti_f */ + 8, /* bunler_f */ + 16, /* bunlei_f */ + 8, /* buneqr_f */ + 16, /* buneqi_f */ + 8, /* bunger_f */ + 16, /* bungei_f */ + 8, /* bungtr_f */ + 16, /* bungti_f */ + 8, /* bltgtr_f */ + 16, /* bltgti_f */ + 8, /* bordr_f */ + 16, /* bordi_f */ + 8, /* bunordr_f */ + 16, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 20, /* addi_d */ + 4, /* subr_d */ + 20, /* subi_d */ + 16, /* rsbi_d */ + 4, /* mulr_d */ + 20, /* muli_d */ + 4, /* divr_d */ + 20, /* divi_d */ + 4, /* negr_d */ + 4, /* absr_d */ + 4, /* sqrtr_d */ + 8, /* ltr_d */ + 24, /* lti_d */ + 8, /* ler_d */ + 24, /* lei_d */ + 8, /* eqr_d */ + 24, /* eqi_d */ + 8, /* ger_d */ + 24, /* gei_d */ + 8, /* gtr_d */ + 24, /* gti_d */ + 8, /* ner_d */ + 24, /* nei_d */ + 8, /* unltr_d */ + 24, /* unlti_d */ + 8, /* unler_d */ + 24, /* unlei_d */ + 8, /* uneqr_d */ + 24, /* uneqi_d */ + 8, /* unger_d */ + 24, /* ungei_d */ + 8, /* ungtr_d */ + 24, /* ungti_d */ + 8, /* ltgtr_d */ + 24, /* ltgti_d */ + 8, /* ordr_d */ + 24, /* ordi_d */ + 12, /* unordr_d */ + 28, /* unordi_d */ + 8, /* truncr_d_i */ + 8, /* truncr_d_l */ + 8, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 16, /* movi_d */ + 4, /* ldr_d */ + 20, /* ldi_d */ + 4, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 20, /* sti_d */ + 4, /* stxr_d */ + 16, /* stxi_d */ + 8, /* bltr_d */ + 20, /* blti_d */ + 8, /* bler_d */ + 20, /* blei_d */ + 8, /* beqr_d */ + 24, /* beqi_d */ + 8, /* bger_d */ + 24, /* bgei_d */ + 8, /* bgtr_d */ + 24, /* bgti_d */ + 8, /* bner_d */ + 24, /* bnei_d */ + 8, /* bunltr_d */ + 24, /* bunlti_d */ + 8, /* bunler_d */ + 24, /* bunlei_d */ + 8, /* buneqr_d */ + 24, /* buneqi_d */ + 8, /* bunger_d */ + 24, /* bungei_d */ + 8, /* bungtr_d */ + 24, /* bungti_d */ + 8, /* bltgtr_d */ + 24, /* bltgti_d */ + 8, /* bordr_d */ + 20, /* bordi_d */ + 8, /* bunordr_d */ + 24, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 4, /* movr_w_f */ + 0, /* movr_ww_d */ + 4, /* movr_w_d */ + 0, /* movr_f_w */ + 4, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 4, /* movr_d_w */ + 12, /* movi_d_w */ +#endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_loongarch.c b/deps/lightning/lib/jit_loongarch.c new file mode 100644 index 000000000..6be353f51 --- /dev/null +++ b/deps/lightning/lib/jit_loongarch.c @@ -0,0 +1,1665 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) + +/* + * Types + */ +typedef struct jit_pointer_t jit_va_list_t; + +/* + * Prototypes + */ +#define patch(instr, node) _patch(_jit, instr, node) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*); + +#define PROTO 1 +# include "jit_loongarch-cpu.c" +# include "jit_loongarch-fpu.c" +#undef PROTO + +/* + * Initialization + */ +jit_register_t _rvs[] = { + { rc(gpr) | 0x14, "$t8" }, + { rc(gpr) | 0x13, "$t7" }, + { rc(gpr) | 0x12, "$t6" }, + { rc(gpr) | 0x11, "$t5" }, + { rc(gpr) | 0x10, "$t4" }, + { rc(gpr) | 0x0f, "$t3" }, + { rc(gpr) | 0x0e, "$t2" }, + { rc(gpr) | 0x0d, "$t1" }, + { rc(gpr) | 0x0c, "$t0" }, + { rc(sav) | rc(gpr) | 0x1f, "$s8" }, + { rc(sav) | rc(gpr) | 0x1e, "$s7" }, + { rc(sav) | rc(gpr) | 0x1d, "$s6" }, + { rc(sav) | rc(gpr) | 0x1c, "$s5" }, + { rc(sav) | rc(gpr) | 0x1b, "$s4" }, + { rc(sav) | rc(gpr) | 0x1a, "$s3" }, + { rc(sav) | rc(gpr) | 0x19, "$s2" }, + { rc(sav) | rc(gpr) | 0x18, "$s1" }, + { rc(sav) | rc(gpr) | 0x17, "$s0" }, + { rc(arg) | rc(gpr) | 0x0b, "$a7" }, + { rc(arg) | rc(gpr) | 0x0a, "$a6" }, + { rc(arg) | rc(gpr) | 0x09, "$a5" }, + { rc(arg) | rc(gpr) | 0x08, "$a4" }, + { rc(arg) | rc(gpr) | 0x07, "$a3" }, + { rc(arg) | rc(gpr) | 0x06, "$a2" }, + { rc(arg) | rc(gpr) | 0x05, "$a1" }, + { rc(arg) | rc(gpr) | 0x04, "$a0" }, + { 0x16, "$fp" }, + { 0x15, "" }, + { 0x00, "$zero" }, + { 0x01, "$ra" }, + { 0x02, "$tp" }, + { 0x03, "$sp" }, + { rc(fpr) | 0x08, "$ft0" }, + { rc(fpr) | 0x09, "$ft1" }, + { rc(fpr) | 0x0a, "$ft2" }, + { rc(fpr) | 0x0b, "$ft3" }, + { rc(fpr) | 0x0c, "$ft4" }, + { rc(fpr) | 0x0d, "$ft5" }, + { rc(fpr) | 0x0e, "$ft6" }, + { rc(fpr) | 0x0f, "$ft7" }, + { rc(fpr) | 0x10, "$ft8" }, + { rc(fpr) | 0x11, "$ft9" }, + { rc(fpr) | 0x12, "$ft10" }, + { rc(fpr) | 0x13, "$ft11" }, + { rc(fpr) | 0x14, "$ft12" }, + { rc(fpr) | 0x15, "$ft13" }, + { rc(fpr) | 0x16, "$ft14" }, + { rc(fpr) | 0x17, "$ft15" }, + { rc(arg) | rc(fpr) | 0x07, "$fa7" }, + { rc(arg) | rc(fpr) | 0x06, "$fa6" }, + { rc(arg) | rc(fpr) | 0x05, "$fa5" }, + { rc(arg) | rc(fpr) | 0x04, "$fa4" }, + { rc(arg) | rc(fpr) | 0x03, "$fa3" }, + { rc(arg) | rc(fpr) | 0x02, "$fa2" }, + { rc(arg) | rc(fpr) | 0x01, "$fa1" }, + { rc(arg) | rc(fpr) | 0x00, "$fa0" }, + { rc(sav) | rc(fpr) | 0x1f, "$fs7" }, + { rc(sav) | rc(fpr) | 0x1e, "$fs6" }, + { rc(sav) | rc(fpr) | 0x1d, "$fs5" }, + { rc(sav) | rc(fpr) | 0x1c, "$fs4" }, + { rc(sav) | rc(fpr) | 0x1b, "$fs3" }, + { rc(sav) | rc(fpr) | 0x1a, "$fs2" }, + { rc(sav) | rc(fpr) | 0x19, "$fs1" }, + { rc(sav) | rc(fpr) | 0x18, "$fs0" }, + { _NOREG, "" }, +}; + +/* + * Implementation + */ +void +jit_get_cpu(void) +{ +} + +void +_jit_init(jit_state_t *_jit) +{ + _jitc->reglen = jit_size(_rvs) - 1; + jit_carry = _NOREG; +} + +void +_jit_prolog(jit_state_t *_jit) +{ + jit_int32_t offset; + + if (_jitc->function) + jit_epilog(); + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + jit_regset_set_ui(&_jitc->regsav, 0); + offset = _jitc->functions.offset; + if (offset >= _jitc->functions.length) { + jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, + _jitc->functions.length * sizeof(jit_function_t), + (_jitc->functions.length + 16) * sizeof(jit_function_t)); + _jitc->functions.length += 16; + } + _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; + _jitc->function->self.size = stack_framesize; + _jitc->function->self.argi = _jitc->function->self.argf = + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + _jitc->function->self.call = jit_call_default; + jit_alloc((jit_pointer_t *)&_jitc->function->regoff, + _jitc->reglen * sizeof(jit_int32_t)); + + /* _no_link here does not mean the jit_link() call can be removed + * by rewriting as: + * _jitc->function->prolog = jit_new_node(jit_code_prolog); + */ + _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); + jit_link(_jitc->function->prolog); + _jitc->function->prolog->w.w = offset; + _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); + /* u: label value + * v: offset in blocks vector + * w: offset in functions vector + */ + _jitc->function->epilog->w.w = offset; + + jit_regset_new(&_jitc->function->regset); +} + +jit_int32_t +_jit_allocai(jit_state_t *_jit, jit_int32_t length) +{ + assert(_jitc->function); + switch (length) { + case 0: case 1: break; + case 2: _jitc->function->self.aoff &= -2; break; + case 3: case 4: _jitc->function->self.aoff &= -4; break; + default: _jitc->function->self.aoff &= -8; break; + } + _jitc->function->self.aoff -= length; + if (!_jitc->realize) { + jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); + jit_dec_synth(); + } + return (_jitc->function->self.aoff); +} + +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + jit_inc_synth_ww(allocar, u, v); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); + jit_dec_synth(); +} + +void +_jit_ret(jit_state_t *_jit) +{ + jit_node_t *instr; + assert(_jitc->function); + jit_inc_synth(ret); + /* jump to epilog */ + instr = jit_jmpi(); + jit_patch_at(instr, _jitc->function->epilog); + jit_dec_synth(); +} + +void +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movi(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_f(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_f, u); + if (JIT_FRET != u) + jit_movr_f(JIT_FRET, u); + else + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_inc_synth_f(reti_f, u); + jit_movi_f(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_d(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_d, u); + if (JIT_FRET != u) + jit_movr_d(JIT_FRET, u); + else + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_inc_synth_d(reti_d, u); + jit_movi_d(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_epilog(jit_state_t *_jit) +{ + assert(_jitc->function); + assert(_jitc->function->epilog->next == NULL); + jit_link(_jitc->function->epilog); + _jitc->function = NULL; +} + +jit_bool_t +_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) +{ + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); + return (jit_arg_f_reg_p(u->u.w)); +} + +void +_jit_ellipsis(jit_state_t *_jit) +{ + jit_inc_synth(ellipsis); + if (_jitc->prepare) { + jit_link_prepare(); + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + jit_link_prolog(); + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + _jitc->function->vagp = _jitc->function->self.argi; + } + jit_dec_synth(); +} + +void +_jit_va_push(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(va_push, u); + jit_pushargr(u); + jit_dec_synth(); +} + +jit_node_t * +_jit_arg(jit_state_t *_jit, jit_code_t code) +{ + jit_node_t *node; + jit_int32_t offset; + assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif + if (jit_arg_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + node = jit_new_node_ww(code, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg_f(jit_state_t *_jit) +{ + jit_node_t *node; + jit_int32_t offset; + assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + offset = _jitc->function->self.argf++; + else if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + offset += 8; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + node = jit_new_node_ww(jit_code_arg_f, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg_d(jit_state_t *_jit) +{ + jit_node_t *node; + jit_int32_t offset; + assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + offset = _jitc->function->self.argf++; + else if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + offset += 8; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + node = jit_new_node_ww(jit_code_arg_d, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +void +_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_c, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, _A0 - v->u.w); + else + jit_ldxi_c(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_uc, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, _A0 - v->u.w); + else + jit_ldxi_uc(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_s, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, _A0 - v->u.w); + else + jit_ldxi_s(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_us, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, _A0 - v->u.w); + else + jit_ldxi_us(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_i); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_i(u, _A0 - v->u.w); + else + jit_ldxi_i(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_i); + jit_inc_synth_wp(getarg_ui, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, _A0 - v->u.w); + else + jit_ldxi_ui(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_l); + jit_inc_synth_wp(getarg_l, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(u, _A0 - v->u.w); + else + jit_ldxi_l(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) +{ + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(_A0 - v->u.w, u); + else + jit_stxi(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) +{ + jit_int32_t regno; + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movi(_A0 - v->u.w, u); + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(getarg_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(u, _FA0 - v->u.w); + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8)); + else + jit_ldxi_f(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargr_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(_FA0 - v->u.w, u); + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u); + else + jit_stxi_f(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) +{ + jit_int32_t regno; + assert(v->code == jit_code_arg_f); + jit_inc_synth_fp(putargi_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movi_f(_FA0 - v->u.w, u); + else if (jit_arg_reg_p(v->u.w - 8)) { + union { + jit_float32_t f; + jit_int32_t i; + } uu; + uu.f = u; + jit_movi(JIT_RA0 - (v->u.w - 8), uu.i); + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(getarg_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(u, _FA0 - v->u.w); + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8)); + else + jit_ldxi_d(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargr_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(_FA0 - v->u.w, u); + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u); + else + jit_stxi_d(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) +{ + jit_int32_t regno; + assert(v->code == jit_code_arg_d); + jit_inc_synth_dp(putargi_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movi_d(_FA0 - v->u.w, u); + else if (jit_arg_reg_p(v->u.w - 8)) { + union { + jit_float64_t d; + jit_int64_t w; + } uu; + uu.d = u; + jit_movi(JIT_RA0 - (v->u.w - 8), uu.w); + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + assert(_jitc->function); + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_int32_t regno; + assert(_jitc->function); + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) +{ + assert(_jitc->function); + jit_inc_synth_w(pushargr_f, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf) && + !(_jitc->function->call.call & jit_call_varargs)) { + jit_movr_f(_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_f_w(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_int32_t regno; + assert(_jitc->function); + jit_inc_synth_f(pushargi_f, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf) && + !(_jitc->function->call.call & jit_call_varargs)) { + jit_movi_f(_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_f_w(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) +{ + assert(_jitc->function); + jit_inc_synth_w(pushargr_d, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf) && + !(_jitc->function->call.call & jit_call_varargs)) { + jit_movr_d(_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_d_w(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi_d(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_int32_t regno; + assert(_jitc->function); + jit_inc_synth_d(pushargi_d, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf) && + !(_jitc->function->call.call & jit_call_varargs)) { + jit_movi_d(_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_d_w(_A0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +jit_bool_t +_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) +{ + jit_int32_t spec; + spec = jit_class(_rvs[regno].spec); + if (spec & jit_class_arg) { + regno = _A0 - regno; + if (regno >= 0 && regno < node->v.w) + return (1); + if (spec & jit_class_fpr) { + regno = _FA0 - regno; + if (regno >= 0 && regno < node->w.w) + return (1); + } + } + return (0); +} + +void +_jit_finishr(jit_state_t *_jit, jit_int32_t r0) +{ + jit_node_t *node; + assert(_jitc->function); + jit_inc_synth_w(finishr, r0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + node = jit_callr(r0); + node->v.w = _jitc->function->call.argi; + node->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); +} + +jit_node_t * +_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) +{ + jit_node_t *node; + assert(_jitc->function); + jit_inc_synth_w(finishi, (jit_word_t)i0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + node = jit_calli(i0); + node->v.w = _jitc->function->call.argi; + node->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); + return (node); +} + +void +_jit_retval_c(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_c, r0); + jit_extr_c(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_uc, r0); + jit_extr_uc(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_s(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_s, r0); + jit_extr_s(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_us(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_us, r0); + jit_extr_us(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); + jit_extr_i(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_ui, r0); + jit_extr_ui(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_l(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_l, r0); + if (r0 != JIT_RET) + jit_movr(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_f(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_f, r0); + if (r0 != JIT_FRET) + jit_movr_f(r0, JIT_FRET); + jit_dec_synth(); +} + +void +_jit_retval_d(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_d, r0); + if (r0 != JIT_FRET) + jit_movr_d(r0, JIT_FRET); + jit_dec_synth(); +} + +jit_pointer_t +_emit_code(jit_state_t *_jit) +{ + jit_node_t *node; + jit_node_t *temp; + jit_word_t word; + jit_int32_t value; + jit_int32_t offset; + struct { + jit_node_t *node; + jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + jit_int32_t patch_offset; + } undo; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + + _jitc->function = NULL; + + jit_reglive_setup(); + + undo.word = 0; + undo.node = NULL; + undo.patch_offset = 0; + +#define assert_data(node) /**/ +#define case_rr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_rw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_wr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrf(name) \ + case jit_code_##name##i_f: \ + assert_data(node); \ + name##i_f(rn(node->u.w), rn(node->v.w), node->w.f); \ + break +#define case_rrd(name) \ + case jit_code_##name##i_d: \ + assert_data(node); \ + name##i_d(rn(node->u.w), rn(node->v.w),node->w.d); \ + break +#define case_wrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_brr(name, type) \ + case jit_code_##name##r##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##r##type(temp->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + else { \ + word = name##r##type(_jit->pc.w, \ + rn(node->v.w), \ + rn(node->w.w)); \ + patch(word, node); \ + } \ + break +#define case_brw(name, type) \ + case jit_code_##name##i##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i##type(temp->u.w, \ + rn(node->v.w), node->w.w); \ + else { \ + word = name##i##type(_jit->pc.w, \ + rn(node->v.w), node->w.w); \ + patch(word, node); \ + } \ + break; +#define case_brf(name) \ + case jit_code_##name##i_f: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_f(temp->u.w, rn(node->v.w), node->w.f); \ + else { \ + word = name##i_f(_jit->pc.w, rn(node->v.w), \ + node->w.f); \ + patch(word, node); \ + } \ + break +#define case_brd(name) \ + case jit_code_##name##i_d: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_d(temp->u.w, rn(node->v.w), node->w.d); \ + else { \ + word = name##i_d(_jit->pc.w, rn(node->v.w), \ + node->w.d); \ + patch(word, node); \ + } \ + break +#if DEVEL_DISASSEMBLER + prevw = _jit->pc.w; +#endif + for (node = _jitc->head; node; node = node->next) { + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + +#if DEVEL_DISASSEMBLER + node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw; + prevw = _jit->pc.w; +#endif + value = jit_classify(node->code); + jit_regarg_set(node, value); + switch (node->code) { + case jit_code_align: + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; + case jit_code_note: case jit_code_name: + node->u.w = _jit->pc.w; + break; + case jit_code_label: + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + break; + case_rrr(add,); + case_rrw(add,); + case_rrr(addc,); + case_rrw(addc,); + case_rrr(addx,); + case_rrw(addx,); + case_rrr(sub,); + case_rrw(sub,); + case_rrr(subc,); + case_rrw(subc,); + case_rrr(subx,); + case_rrw(subx,); + case_rrw(rsb,); + case_rrr(mul,); + case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); + case_rrr(div,); + case_rrw(div,); + case_rrr(div, _u); + case_rrw(div, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); + case_rrr(lsh,); + case_rrw(lsh,); + case_rrr(rsh,); + case_rrw(rsh,); + case_rrr(rsh, _u); + case_rrw(rsh, _u); + case_rr(neg,); + case_rr(com,); + case_rrr(and,); + case_rrw(and,); + case_rrr(or,); + case_rrw(or,); + case_rrr(xor,); + case_rrw(xor,); + case_rr(trunc, _f_i); + case_rr(trunc, _d_i); + case_rr(trunc, _f_l); + case_rr(trunc, _d_l); + case_rr(ld, _c); + case_rw(ld, _c); + case_rr(ld, _uc); + case_rw(ld, _uc); + case_rr(ld, _s); + case_rw(ld, _s); + case_rr(ld, _us); + case_rw(ld, _us); + case_rr(ld, _i); + case_rw(ld, _i); + case_rr(ld, _ui); + case_rw(ld, _ui); + case_rr(ld, _l); + case_rw(ld, _l); + case_rrr(ldx, _c); + case_rrw(ldx, _c); + case_rrr(ldx, _uc); + case_rrw(ldx, _uc); + case_rrr(ldx, _s); + case_rrw(ldx, _s); + case_rrr(ldx, _us); + case_rrw(ldx, _us); + case_rrr(ldx, _i); + case_rrw(ldx, _i); + case_rrr(ldx, _ui); + case_rrw(ldx, _ui); + case_rrr(ldx, _l); + case_rrw(ldx, _l); + case_rr(st, _c); + case_wr(st, _c); + case_rr(st, _s); + case_wr(st, _s); + case_rr(st, _i); + case_wr(st, _i); + case_rr(st, _l); + case_wr(st, _l); + case_rrr(stx, _c); + case_wrr(stx, _c); + case_rrr(stx, _s); + case_wrr(stx, _s); + case_rrr(stx, _i); + case_wrr(stx, _i); + case_rrr(stx, _l); + case_wrr(stx, _l); + case_rr(hton, _us); + case_rr(hton, _ui); + case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); + case_rr(ext, _c); + case_rr(ext, _uc); + case_rr(ext, _s); + case_rr(ext, _us); + case_rr(ext, _i); + case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); + case_rr(mov,); + case jit_code_movi: + if (node->flag & jit_flag_node) { + temp = node->v.n; + if (temp->code == jit_code_data || + (temp->code == jit_code_label && + (temp->flag & jit_flag_patch))) + movi(rn(node->u.w), temp->u.w); + else { + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + word = movi_p(rn(node->u.w), temp->u.w); + patch(word, node); + } + } + else + movi(rn(node->u.w), node->v.w); + break; + case_rrr(lt,); + case_rrw(lt,); + case_rrr(lt, _u); + case_rrw(lt, _u); + case_rrr(le,); + case_rrw(le,); + case_rrr(le, _u); + case_rrw(le, _u); + case_rrr(eq,); + case_rrw(eq,); + case_rrr(ge,); + case_rrw(ge,); + case_rrr(ge, _u); + case_rrw(ge, _u); + case_rrr(gt,); + case_rrw(gt,); + case_rrr(gt, _u); + case_rrw(gt, _u); + case_rrr(ne,); + case_rrw(ne,); + case_brr(blt,); + case_brw(blt,); + case_brr(blt, _u); + case_brw(blt, _u); + case_brr(ble,); + case_brw(ble,); + case_brr(ble, _u); + case_brw(ble, _u); + case_brr(beq,); + case_brw(beq,); + case_brr(bge,); + case_brw(bge,); + case_brr(bge, _u); + case_brw(bge, _u); + case_brr(bgt,); + case_brw(bgt,); + case_brr(bgt, _u); + case_brw(bgt, _u); + case_brr(bne,); + case_brw(bne,); + case_brr(boadd,); + case_brw(boadd,); + case_brr(boadd, _u); + case_brw(boadd, _u); + case_brr(bxadd,); + case_brw(bxadd,); + case_brr(bxadd, _u); + case_brw(bxadd, _u); + case_brr(bosub,); + case_brw(bosub,); + case_brr(bosub, _u); + case_brw(bosub, _u); + case_brr(bxsub,); + case_brw(bxsub,); + case_brr(bxsub, _u); + case_brw(bxsub, _u); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); + case_rrr(add, _f); + case_rrf(add); + case_rrr(sub, _f); + case_rrf(sub); + case_rrf(rsb); + case_rrr(mul, _f); + case_rrf(mul); + case_rrr(div, _f); + case_rrf(div); + case_rr(abs, _f); + case_rr(neg, _f); + case_rr(sqrt, _f); + case_rr(ext, _f); + case_rr(ld, _f); + case_rw(ld, _f); + case_rrr(ldx, _f); + case_rrw(ldx, _f); + case_rr(st, _f); + case_wr(st, _f); + case_rrr(stx, _f); + case_wrr(stx, _f); + case_rr(mov, _f); + case jit_code_movi_f: + assert_data(node); + movi_f(rn(node->u.w), node->v.f); + break; + case_rr(ext, _d_f); + case_rrr(lt, _f); + case_rrf(lt); + case_rrr(le, _f); + case_rrf(le); + case_rrr(eq, _f); + case_rrf(eq); + case_rrr(ge, _f); + case_rrf(ge); + case_rrr(gt, _f); + case_rrf(gt); + case_rrr(ne, _f); + case_rrf(ne); + case_rrr(unlt, _f); + case_rrf(unlt); + case_rrr(unle, _f); + case_rrf(unle); + case_rrr(uneq, _f); + case_rrf(uneq); + case_rrr(unge, _f); + case_rrf(unge); + case_rrr(ungt, _f); + case_rrf(ungt); + case_rrr(ltgt, _f); + case_rrf(ltgt); + case_rrr(ord, _f); + case_rrf(ord); + case_rrr(unord, _f); + case_rrf(unord); + case_brr(blt, _f); + case_brf(blt); + case_brr(ble, _f); + case_brf(ble); + case_brr(beq, _f); + case_brf(beq); + case_brr(bge, _f); + case_brf(bge); + case_brr(bgt, _f); + case_brf(bgt); + case_brr(bne, _f); + case_brf(bne); + case_brr(bunlt, _f); + case_brf(bunlt); + case_brr(bunle, _f); + case_brf(bunle); + case_brr(buneq, _f); + case_brf(buneq); + case_brr(bunge, _f); + case_brf(bunge); + case_brr(bungt, _f); + case_brf(bungt); + case_brr(bltgt, _f); + case_brf(bltgt); + case_brr(bord, _f); + case_brf(bord); + case_brr(bunord, _f); + case_brf(bunord); + case_rrr(add, _d); + case_rrd(add); + case_rrr(sub, _d); + case_rrd(sub); + case_rrd(rsb); + case_rrr(mul, _d); + case_rrd(mul); + case_rrr(div, _d); + case_rrd(div); + case_rr(abs, _d); + case_rr(neg, _d); + case_rr(sqrt, _d); + case_rr(ext, _d); + case_rr(ld, _d); + case_rw(ld, _d); + case_rrr(ldx, _d); + case_rrw(ldx, _d); + case_rr(st, _d); + case_wr(st, _d); + case_rrr(stx, _d); + case_wrr(stx, _d); + case_rr(mov, _d); + case jit_code_movi_d: + assert_data(node); + movi_d(rn(node->u.w), node->v.d); + break; + case_rr(ext, _f_d); + case_rrr(lt, _d); + case_rrd(lt); + case_rrr(le, _d); + case_rrd(le); + case_rrr(eq, _d); + case_rrd(eq); + case_rrr(ge, _d); + case_rrd(ge); + case_rrr(gt, _d); + case_rrd(gt); + case_rrr(ne, _d); + case_rrd(ne); + case_rrr(unlt, _d); + case_rrd(unlt); + case_rrr(unle, _d); + case_rrd(unle); + case_rrr(uneq, _d); + case_rrd(uneq); + case_rrr(unge, _d); + case_rrd(unge); + case_rrr(ungt, _d); + case_rrd(ungt); + case_rrr(ltgt, _d); + case_rrd(ltgt); + case_rrr(ord, _d); + case_rrd(ord); + case_rrr(unord, _d); + case_rrd(unord); + case_brr(blt, _d); + case_brd(blt); + case_brr(ble, _d); + case_brd(ble); + case_brr(beq, _d); + case_brd(beq); + case_brr(bge, _d); + case_brd(bge); + case_brr(bgt, _d); + case_brd(bgt); + case_brr(bne, _d); + case_brd(bne); + case_brr(bunlt, _d); + case_brd(bunlt); + case_brr(bunle, _d); + case_brd(bunle); + case_brr(buneq, _d); + case_brd(buneq); + case_brr(bunge, _d); + case_brd(bunge); + case_brr(bungt, _d); + case_brd(bungt); + case_brr(bltgt, _d); + case_brd(bltgt); + case_brr(bord, _d); + case_brd(bord); + case_brr(bunord, _d); + case_brd(bunord); + case jit_code_jmpr: + jmpr(rn(node->u.w)); + break; + case jit_code_jmpi: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + jmpi(temp->u.w); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_si26_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); + patch(word, node); + } + } + else + jmpi(node->u.w); + break; + case jit_code_callr: + callr(rn(node->u.w)); + break; + case jit_code_calli: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_si26_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); + patch(word, node); + } + } + else + calli(node->u.w); + break; + case jit_code_prolog: + _jitc->function = _jitc->functions.ptr + node->w.w; + undo.node = node; + undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif + undo.patch_offset = _jitc->patches.offset; + restart_function: + _jitc->again = 0; + prolog(node); + break; + case jit_code_epilog: + assert(_jitc->function == _jitc->functions.ptr + node->w.w); + if (_jitc->again) { + for (temp = undo.node->next; + temp != node; temp = temp->next) { + if (temp->code == jit_code_label || + temp->code == jit_code_epilog) + temp->flag &= ~jit_flag_patch; + } + temp->flag &= ~jit_flag_patch; + node = undo.node; + _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif + _jitc->patches.offset = undo.patch_offset; + goto restart_function; + } + if (node->link && (word = _jit->pc.w & 3)) + nop(4 - word); + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + epilog(node); + _jitc->function = NULL; + break; + case jit_code_movr_w_f: + movr_w_f(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movr_f_w: + movr_f_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movi_f_w: + assert_data(node); + movi_f_w(rn(node->u.w), node->v.f); + break; + case jit_code_movr_w_d: + movr_w_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movr_d_w: + movr_d_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movi_d_w: + assert_data(node); + movi_d_w(rn(node->u.w), node->v.d); + break; + case jit_code_va_start: + vastart(rn(node->u.w)); + break; + case jit_code_va_arg: + vaarg(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_va_arg_d: + vaarg_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_live: case jit_code_ellipsis: + case jit_code_va_push: + case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: + case jit_code_arg_f: case jit_code_arg_d: + case jit_code_va_end: + case jit_code_ret: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: + case jit_code_retr_f: case jit_code_reti_f: + case jit_code_retr_d: case jit_code_reti_d: + case jit_code_getarg_c: case jit_code_getarg_uc: + case jit_code_getarg_s: case jit_code_getarg_us: + case jit_code_getarg_i: + case jit_code_getarg_ui: case jit_code_getarg_l: + case jit_code_getarg_f: case jit_code_getarg_d: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: + case jit_code_putargr_f: case jit_code_putargi_f: + case jit_code_putargr_d: case jit_code_putargi_d: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: + case jit_code_pushargr_f: case jit_code_pushargi_f: + case jit_code_pushargr_d: case jit_code_pushargi_d: + case jit_code_retval_c: case jit_code_retval_uc: + case jit_code_retval_s: case jit_code_retval_us: + case jit_code_retval_i: + case jit_code_retval_ui: case jit_code_retval_l: + case jit_code_retval_f: case jit_code_retval_d: + case jit_code_prepare: + case jit_code_finishr: case jit_code_finishi: + break; + default: + abort(); + } + if (jit_carry != _NOREG) { + switch (node->code) { + case jit_code_note: + case jit_code_addcr: case jit_code_addci: + case jit_code_addxr: case jit_code_addxi: + case jit_code_subcr: case jit_code_subci: + case jit_code_subxr: case jit_code_subxi: + break; + default: + jit_unget_reg(jit_carry); + jit_carry = _NOREG; + break; + } + } + jit_regarg_clr(node, value); + assert(_jitc->regarg == 0 || + (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry))); + assert(_jitc->synth == 0); + /* update register live state */ + jit_reglive(node); + } +#undef case_brw +#undef case_brr +#undef case_wrr +#undef case_rrw +#undef case_rrr +#undef case_wr +#undef case_rw +#undef case_rr + + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + patch_at(_jitc->patches.ptr[offset].inst, word); + } + + jit_flush(_jit->code.ptr, _jit->pc.uc); + + return (_jit->code.ptr); +} + +#define CODE 1 +# include "jit_loongarch-cpu.c" +# include "jit_loongarch-fpu.c" +#undef CODE + +void +jit_flush(void *fptr, void *tptr) +{ +#if defined(__GNUC__) + jit_word_t f, t, s; + + s = sysconf(_SC_PAGE_SIZE); + f = (jit_word_t)fptr & -s; + t = (((jit_word_t)tptr) + s - 1) & -s; + __clear_cache((void *)f, (void *)t); +#endif +} + +void +_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi(rn(r0), rn(r1), i0); +} + +void +_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi(i0, rn(r0), rn(r1)); +} + +void +_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi_d(rn(r0), rn(r1), i0); +} + +void +_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi_d(i0, rn(r0), rn(r1)); +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +{ + jit_int32_t flag; + + assert(node->flag & jit_flag_node); + if (node->code == jit_code_movi) + flag = node->v.n->flag; + else + flag = node->u.n->flag; + assert(!(flag & jit_flag_patch)); + if (_jitc->patches.offset >= _jitc->patches.length) { + jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, + _jitc->patches.length * sizeof(jit_patch_t), + (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); + _jitc->patches.length += 1024; + } + _jitc->patches.ptr[_jitc->patches.offset].inst = instr; + _jitc->patches.ptr[_jitc->patches.offset].node = node; + ++_jitc->patches.offset; +} diff --git a/deps/lightning/lib/jit_memory.c b/deps/lightning/lib/jit_memory.c index 4d7f92da0..e4e5deb3d 100644 --- a/deps/lightning/lib/jit_memory.c +++ b/deps/lightning/lib/jit_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -19,7 +19,6 @@ #include #include -#include /* * Prototypes diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index b73f4b181..44fa99d30 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -54,8 +54,11 @@ typedef union { #endif int op; } jit_instr_t; -/* FIXME */ +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# define jit_mips2_p() 1 +#else # define jit_mips2_p() 0 +#endif # define _ZERO_REGNO 0 # define _T0_REGNO 0x08 # define _T1_REGNO 0x09 @@ -107,7 +110,8 @@ typedef union { # endif # define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767) # define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) -# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) == 1) : 0) +# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) +# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) # define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0) # define masked_bits_count(im) __builtin_popcountl(im) # define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im)) @@ -321,6 +325,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define DADDIU(rt,rs,im) hrri(MIPS_DADDIU,rs,rt,im) # define SUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_SUBU) # define DSUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_DSUBU) +# define MUL(rd,rs,rt) hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL) # define MULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT) # define MULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU) # define DMULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT) @@ -346,10 +351,15 @@ static void _nop(jit_state_t*,jit_int32_t); # define DSRL32(rd,rt,sa) rrit(rt,rd,sa,MIPS_DSRL32) # define INS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_INS) # define DINS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_DINS) +# define DINSU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos-32,MIPS_DINSU) +# define DINSM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos,MIPS_DINSM) # define EXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_EXT) # define DEXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_DEXT) +# define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU) +# define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM) # define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL) # define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL) +# define SYNC() rrr_t(_ZERO_REGNO,_ZERO_REGNO,_ZERO_REGNO,MIPS_SYNC) # define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI) # define MFLO(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO) # define MTHI(rs) rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTHI) @@ -367,10 +377,14 @@ static void _nop(jit_state_t*,jit_int32_t); # define LW(rt,of,rb) hrri(MIPS_LW,rb,rt,of) # define LWU(rt,of,rb) hrri(MIPS_LWU,rb,rt,of) # define LD(rt,of,rb) hrri(MIPS_LD,rb,rt,of) +# define LL(rt,of,rb) hrri(MIPS_LL,rb,rt,of) +# define LLD(rt,of,rb) hrri(MIPS_LLD,rb,rt,of) # define SB(rt,of,rb) hrri(MIPS_SB,rb,rt,of) # define SH(rt,of,rb) hrri(MIPS_SH,rb,rt,of) # define SW(rt,of,rb) hrri(MIPS_SW,rb,rt,of) # define SD(rt,of,rb) hrri(MIPS_SD,rb,rt,of) +# define SC(rt,of,rb) hrri(MIPS_SC,rb,rt,of) +# define SCD(rt,of,rb) hrri(MIPS_SCD,rb,rt,of) # define WSBH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL) # define SEB(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL) # define SEH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL) @@ -391,6 +405,8 @@ static void _nop(jit_state_t*,jit_int32_t); # define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) # endif # define J(i0) hi(MIPS_J,i0) +# define JAL(i0) hi(MIPS_JAL,i0) +# define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN) # define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ) # define comr(r0,r1) xori(r0,r1,-1) # define negr(r0,r1) subr(r0,_ZERO_REGNO,r1) @@ -411,6 +427,10 @@ static void _nop(jit_state_t*,jit_int32_t); # define div(rs,rt) DDIV(rs,rt) # define divu(rs,rt) DDIVU(rs,rt) # endif +# define extr(rd,rt,lsb,nb) _extr(_jit,rd,rt,lsb,nb) +static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define insr(rd,rt,lsb,nb) _insr(_jit,rd,rt,lsb,nb) +static void _insr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2) @@ -506,6 +526,13 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) MOVN(r0, r1, r2) +# define movzr(r0,r1,r2) MOVZ(r0, r1, r2) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ldr_c(r0,r1) LB(r0,0,r1) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); @@ -591,23 +618,12 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # endif -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -806,6 +822,38 @@ _nop(jit_state_t *_jit, jit_int32_t i0) assert(i0 == 0); } +static void +_extr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t pos, jit_int32_t size) +{ + assert(size > 0); + + if (__WORDSIZE == 32) + EXT(r0, r1, pos, size); + else if (pos >= 32) + DEXTU(r0, r1, pos, size); + else if (size > 32) + DEXTM(r0, r1, pos, size); + else + DEXT(r0, r1, pos, size); +} + +static void +_insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t pos, jit_int32_t size) +{ + assert(size > 0); + + if (__WORDSIZE == 32) + INS(r0, r1, pos, size); + else if (pos >= 32) + DINSU(r0, r1, pos, size); + else if (size > 32) + DINSM(r0, r1, pos, size); + else + DINS(r0, r1, pos, size); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1000,8 +1048,12 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - multu(r1, r2); - MFLO(r0); + if (jit_mips2_p() && __WORDSIZE == 32) + MUL(r0, r1, r2); + else { + multu(r1, r2); + MFLO(r0); + } } static void @@ -1166,25 +1218,33 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_zero_extend_short_p(i0)) - ANDI(r0, r1, i0); + ANDI(r0, r1, i0); else if (is_low_mask(i0)) { -#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) - if (masked_bits_count(i0) <= 32) - EXT(r0, r1, 0, masked_bits_count(i0)); - else -#endif - { - lshi(r0, r1, unmasked_bits_count(i0)); - rshi_u(r0, r0, unmasked_bits_count(i0)); - } + if (jit_mips2_p()) + extr(r0, r1, 0, masked_bits_count(i0)); + else { + lshi(r0, r1, unmasked_bits_count(i0)); + rshi_u(r0, r0, unmasked_bits_count(i0)); + } } else if (is_high_mask(i0)) { - rshi(r0, r1, unmasked_bits_count(i0)); - lshi(r0, r0, unmasked_bits_count(i0)); + if (jit_mips2_p() && r0 == r1) + insr(r0, _ZERO_REGNO, 0, unmasked_bits_count(i0)); + else { + rshi(r0, r1, unmasked_bits_count(i0)); + lshi(r0, r0, unmasked_bits_count(i0)); + } + } else if (jit_mips2_p() && is_middle_mask(i0)) { + extr(r0, r1, __builtin_ctzl(i0), masked_bits_count(i0)); + lshi(r0, r0, __builtin_ctzl(i0)); + } else if (jit_mips2_p() && is_middle_mask(~i0)) { + if (r0 != r1) + movr(r0, r1); + insr(r0, _ZERO_REGNO, __builtin_ctzl(~i0), masked_bits_count(~i0)); } else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - AND(r0, r1, rn(reg)); - jit_unget_reg(reg); + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + AND(r0, r1, rn(reg)); + jit_unget_reg(reg); } } @@ -1279,6 +1339,47 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LL(r0, 0, r1); +# else + LLD(r0, 0, r1); +# endif + jump0 = _jit->pc.w; + BNE(r0, r2, 1); /* bne done r0 r2 */ + movi(r0, 0); /* set to 0 in delay slot */ + movr(r0, r3); /* after jump and delay slot */ + /* store new value */ +# if __WORDSIZE == 32 + SC(r0, 0, r1); +# else + SCD(r0, 0, r1); +# endif + jump1 = _jit->pc.w; + BEQ(r0, _ZERO_REGNO, 0); /* beqi retry r0 0 */ + movi(r0, 1); /* set to 1 in delay slot */ + SYNC(); + /* done: */ + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1705,60 +1806,34 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); + if (jit_mips2_p()) { + extr_us(r0, r1); + WSBH(r0, r0); + } else { + generic_bswapr_us(_jit, r0, r1); + } } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); + if (jit_mips2_p()) { + if (__WORDSIZE == 64) { + SLL(r0, r1, 0); + WSBH(r0, r0); + ROTR(r0, r0, 16); + extr(r0, r0, 0, 32); + } else { + WSBH(r0, r1); + ROTR(r0, r0, 16); + } + } else { + generic_bswapr_ui(_jit, r0, r1); + } } -# endif static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -1786,8 +1861,12 @@ _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 32); - rshi_u(r0, r0, 32); + if (jit_mips2_p()) + DEXT(r0, r1, 0, 32); + else { + lshi(r0, r1, 32); + rshi_u(r0, r0, 32); + } } # endif @@ -1833,10 +1912,8 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 == 0) { - SLT(r0, _ZERO_REGNO, r1); - XORI(r0, r0, 1); - } + if (can_sign_extend_short_p(i0 + 1)) + SLTI(r0, r1, i0 + 1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1857,10 +1934,8 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 == 0) { - SLTU(r0, _ZERO_REGNO, r1); - XORI(r0, r0, 1); - } + if (can_sign_extend_short_p(i0 + 1)) + SLTIU(r0, r1, i0 + 1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1901,10 +1976,15 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ger(r0, r1, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0)) { + SLTI(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger(r0, r1, rn(reg)); + jit_unget_reg(reg); + } } static void @@ -1919,10 +1999,15 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ger_u(r0, r1, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0)) { + SLTIU(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger_u(r0, r1, rn(reg)); + jit_unget_reg(reg); + } } static void @@ -2845,16 +2930,12 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_zero_extend_short_p(i1)) { - ANDI(rn(t0), r0, i1); - w = _jit->pc.w; - BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(t0), i1); - w = bmsr(i0, r0, rn(t0)); - } + + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); + NOP(1); + jit_unget_reg(t0); return (w); } @@ -2879,16 +2960,12 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_zero_extend_short_p(i1)) { - ANDI(rn(t0), r0, i1); - w = _jit->pc.w; - BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(t0), i1); - w = bmcr(i0, r0, rn(t0)); - } + + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); + NOP(1); + jit_unget_reg(t0); return (w); } @@ -2906,9 +2983,37 @@ _callr(jit_state_t *_jit, jit_int32_t r0) static void _calli(jit_state_t *_jit, jit_word_t i0) { - movi(_T9_REGNO, i0); - JALR(_T9_REGNO); - NOP(1); + jit_word_t w; + w = _jit->pc.w; + if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { + if (can_sign_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + addiu(_T9_REGNO, _ZERO_REGNO, i0); + } + else if (can_zero_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + ORI(_T9_REGNO, _ZERO_REGNO, i0); + } + else if (can_sign_extend_int_p(i0)) { + if (i0 & 0xffff) { + LUI(_T9_REGNO, i0 >> 16); + JAL((i0 & ~0xf0000000) >> 2); + ORI(_T9_REGNO, _T9_REGNO, i0); + } + else { + JAL((i0 & ~0xf0000000) >> 2); + LUI(_T9_REGNO, i0 >> 16); + } + } + else + goto fallback; + } + else { + fallback: + movi(_T9_REGNO, i0); + JALR(_T9_REGNO); + NOP(1); + } } static jit_word_t diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c index 7513219b3..6209fd680 100644 --- a/deps/lightning/lib/jit_mips-fpu.c +++ b/deps/lightning/lib/jit_mips-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index 613aa0090..f49406d80 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -1,416 +1,12 @@ #if __WORDSIZE == 32 -#if NEW_ABI -#define JIT_INSTR_MAX 44 - 0, /* data */ - 0, /* live */ - 0, /* align */ - 0, /* save */ - 0, /* load */ - 0, /* #name */ - 0, /* #note */ - 0, /* label */ - 44, /* prolog */ - 0, /* ellipsis */ - 0, /* va_push */ - 0, /* allocai */ - 0, /* allocar */ - 0, /* arg */ - 0, /* getarg_c */ - 0, /* getarg_uc */ - 0, /* getarg_s */ - 0, /* getarg_us */ - 0, /* getarg_i */ - 0, /* getarg_ui */ - 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ - 0, /* va_end */ - 4, /* addr */ - 12, /* addi */ - 12, /* addcr */ - 20, /* addci */ - 28, /* addxr */ - 28, /* addxi */ - 4, /* subr */ - 12, /* subi */ - 12, /* subcr */ - 20, /* subci */ - 28, /* subxr */ - 28, /* subxi */ - 16, /* rsbi */ - 8, /* mulr */ - 16, /* muli */ - 12, /* qmulr */ - 20, /* qmuli */ - 12, /* qmulr_u */ - 20, /* qmuli_u */ - 8, /* divr */ - 16, /* divi */ - 8, /* divr_u */ - 16, /* divi_u */ - 12, /* qdivr */ - 16, /* qdivi */ - 12, /* qdivr_u */ - 16, /* qdivi_u */ - 8, /* remr */ - 16, /* remi */ - 8, /* remr_u */ - 16, /* remi_u */ - 4, /* andr */ - 12, /* andi */ - 4, /* orr */ - 12, /* ori */ - 4, /* xorr */ - 12, /* xori */ - 4, /* lshr */ - 4, /* lshi */ - 4, /* rshr */ - 4, /* rshi */ - 4, /* rshr_u */ - 4, /* rshi_u */ - 4, /* negr */ - 8, /* comr */ - 4, /* ltr */ - 4, /* lti */ - 4, /* ltr_u */ - 4, /* lti_u */ - 8, /* ler */ - 12, /* lei */ - 8, /* ler_u */ - 12, /* lei_u */ - 12, /* eqr */ - 12, /* eqi */ - 8, /* ger */ - 12, /* gei */ - 8, /* ger_u */ - 12, /* gei_u */ - 4, /* gtr */ - 8, /* gti */ - 4, /* gtr_u */ - 8, /* gti_u */ - 8, /* ner */ - 8, /* nei */ - 4, /* movr */ - 8, /* movi */ - 8, /* extr_c */ - 4, /* extr_uc */ - 8, /* extr_s */ - 4, /* extr_us */ - 0, /* extr_i */ - 0, /* extr_ui */ - 4, /* htonr_us */ - 4, /* htonr_ui */ - 0, /* htonr_ul */ - 4, /* ldr_c */ - 12, /* ldi_c */ - 4, /* ldr_uc */ - 12, /* ldi_uc */ - 4, /* ldr_s */ - 12, /* ldi_s */ - 4, /* ldr_us */ - 12, /* ldi_us */ - 4, /* ldr_i */ - 12, /* ldi_i */ - 0, /* ldr_ui */ - 0, /* ldi_ui */ - 0, /* ldr_l */ - 0, /* ldi_l */ - 8, /* ldxr_c */ - 4, /* ldxi_c */ - 8, /* ldxr_uc */ - 4, /* ldxi_uc */ - 8, /* ldxr_s */ - 4, /* ldxi_s */ - 8, /* ldxr_us */ - 4, /* ldxi_us */ - 8, /* ldxr_i */ - 4, /* ldxi_i */ - 0, /* ldxr_ui */ - 0, /* ldxi_ui */ - 0, /* ldxr_l */ - 0, /* ldxi_l */ - 4, /* str_c */ - 12, /* sti_c */ - 4, /* str_s */ - 12, /* sti_s */ - 4, /* str_i */ - 12, /* sti_i */ - 0, /* str_l */ - 0, /* sti_l */ - 8, /* stxr_c */ - 4, /* stxi_c */ - 8, /* stxr_s */ - 4, /* stxi_s */ - 8, /* stxr_i */ - 4, /* stxi_i */ - 0, /* stxr_l */ - 0, /* stxi_l */ - 12, /* bltr */ - 12, /* blti */ - 12, /* bltr_u */ - 12, /* blti_u */ - 12, /* bler */ - 16, /* blei */ - 12, /* bler_u */ - 16, /* blei_u */ - 8, /* beqr */ - 16, /* beqi */ - 12, /* bger */ - 12, /* bgei */ - 12, /* bger_u */ - 12, /* bgei_u */ - 12, /* bgtr */ - 16, /* bgti */ - 12, /* bgtr_u */ - 16, /* bgti_u */ - 8, /* bner */ - 16, /* bnei */ - 12, /* bmsr */ - 12, /* bmsi */ - 12, /* bmcr */ - 12, /* bmci */ - 28, /* boaddr */ - 28, /* boaddi */ - 16, /* boaddr_u */ - 20, /* boaddi_u */ - 28, /* bxaddr */ - 28, /* bxaddi */ - 16, /* bxaddr_u */ - 20, /* bxaddi_u */ - 28, /* bosubr */ - 28, /* bosubi */ - 16, /* bosubr_u */ - 20, /* bosubi_u */ - 28, /* bxsubr */ - 28, /* bxsubi */ - 16, /* bxsubr_u */ - 20, /* bxsubi_u */ - 0, /* jmpr */ - 8, /* jmpi */ - 12, /* callr */ - 16, /* calli */ - 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ - 0, /* finishr */ - 0, /* finishi */ - 0, /* ret */ - 0, /* retr */ - 0, /* reti */ - 0, /* retval_c */ - 0, /* retval_uc */ - 0, /* retval_s */ - 0, /* retval_us */ - 0, /* retval_i */ - 0, /* retval_ui */ - 0, /* retval_l */ - 44, /* epilog */ - 0, /* arg_f */ - 0, /* getarg_f */ - 0, /* putargr_f */ - 0, /* putargi_f */ - 4, /* addr_f */ - 16, /* addi_f */ - 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ - 4, /* mulr_f */ - 16, /* muli_f */ - 4, /* divr_f */ - 16, /* divi_f */ - 4, /* negr_f */ - 4, /* absr_f */ - 4, /* sqrtr_f */ - 16, /* ltr_f */ - 28, /* lti_f */ - 16, /* ler_f */ - 28, /* lei_f */ - 16, /* eqr_f */ - 28, /* eqi_f */ - 16, /* ger_f */ - 28, /* gei_f */ - 16, /* gtr_f */ - 28, /* gti_f */ - 16, /* ner_f */ - 28, /* nei_f */ - 16, /* unltr_f */ - 28, /* unlti_f */ - 16, /* unler_f */ - 28, /* unlei_f */ - 16, /* uneqr_f */ - 28, /* uneqi_f */ - 16, /* unger_f */ - 28, /* ungei_f */ - 16, /* ungtr_f */ - 28, /* ungti_f */ - 16, /* ltgtr_f */ - 28, /* ltgti_f */ - 16, /* ordr_f */ - 28, /* ordi_f */ - 16, /* unordr_f */ - 28, /* unordi_f */ - 8, /* truncr_f_i */ - 0, /* truncr_f_l */ - 8, /* extr_f */ - 4, /* extr_d_f */ - 4, /* movr_f */ - 12, /* movi_f */ - 4, /* ldr_f */ - 12, /* ldi_f */ - 8, /* ldxr_f */ - 4, /* ldxi_f */ - 4, /* str_f */ - 12, /* sti_f */ - 8, /* stxr_f */ - 4, /* stxi_f */ - 12, /* bltr_f */ - 24, /* blti_f */ - 12, /* bler_f */ - 24, /* blei_f */ - 12, /* beqr_f */ - 24, /* beqi_f */ - 12, /* bger_f */ - 24, /* bgei_f */ - 12, /* bgtr_f */ - 24, /* bgti_f */ - 12, /* bner_f */ - 24, /* bnei_f */ - 12, /* bunltr_f */ - 24, /* bunlti_f */ - 12, /* bunler_f */ - 24, /* bunlei_f */ - 12, /* buneqr_f */ - 24, /* buneqi_f */ - 12, /* bunger_f */ - 24, /* bungei_f */ - 12, /* bungtr_f */ - 24, /* bungti_f */ - 12, /* bltgtr_f */ - 24, /* bltgti_f */ - 12, /* bordr_f */ - 24, /* bordi_f */ - 12, /* bunordr_f */ - 24, /* bunordi_f */ - 0, /* pushargr_f */ - 0, /* pushargi_f */ - 0, /* retr_f */ - 0, /* reti_f */ - 0, /* retval_f */ - 0, /* arg_d */ - 0, /* getarg_d */ - 0, /* putargr_d */ - 0, /* putargi_d */ - 4, /* addr_d */ - 16, /* addi_d */ - 4, /* subr_d */ - 16, /* subi_d */ - 16, /* rsbi_d */ - 4, /* mulr_d */ - 16, /* muli_d */ - 4, /* divr_d */ - 16, /* divi_d */ - 4, /* negr_d */ - 4, /* absr_d */ - 4, /* sqrtr_d */ - 16, /* ltr_d */ - 28, /* lti_d */ - 16, /* ler_d */ - 28, /* lei_d */ - 16, /* eqr_d */ - 28, /* eqi_d */ - 16, /* ger_d */ - 28, /* gei_d */ - 16, /* gtr_d */ - 28, /* gti_d */ - 16, /* ner_d */ - 28, /* nei_d */ - 16, /* unltr_d */ - 28, /* unlti_d */ - 16, /* unler_d */ - 28, /* unlei_d */ - 16, /* uneqr_d */ - 28, /* uneqi_d */ - 16, /* unger_d */ - 28, /* ungei_d */ - 16, /* ungtr_d */ - 28, /* ungti_d */ - 16, /* ltgtr_d */ - 28, /* ltgti_d */ - 16, /* ordr_d */ - 28, /* ordi_d */ - 16, /* unordr_d */ - 28, /* unordi_d */ - 8, /* truncr_d_i */ - 0, /* truncr_d_l */ - 8, /* extr_d */ - 4, /* extr_f_d */ - 4, /* movr_d */ - 12, /* movi_d */ - 4, /* ldr_d */ - 12, /* ldi_d */ - 8, /* ldxr_d */ - 4, /* ldxi_d */ - 4, /* str_d */ - 12, /* sti_d */ - 8, /* stxr_d */ - 4, /* stxi_d */ - 12, /* bltr_d */ - 24, /* blti_d */ - 12, /* bler_d */ - 24, /* blei_d */ - 12, /* beqr_d */ - 24, /* beqi_d */ - 12, /* bger_d */ - 24, /* bgei_d */ - 12, /* bgtr_d */ - 24, /* bgti_d */ - 12, /* bner_d */ - 24, /* bnei_d */ - 12, /* bunltr_d */ - 24, /* bunlti_d */ - 12, /* bunler_d */ - 24, /* bunlei_d */ - 12, /* buneqr_d */ - 24, /* buneqi_d */ - 12, /* bunger_d */ - 24, /* bungei_d */ - 12, /* bungtr_d */ - 24, /* bungti_d */ - 12, /* bltgtr_d */ - 24, /* bltgti_d */ - 12, /* bordr_d */ - 24, /* bordi_d */ - 12, /* bunordr_d */ - 24, /* bunordi_d */ - 0, /* pushargr_d */ - 0, /* pushargi_d */ - 0, /* retr_d */ - 0, /* reti_d */ - 0, /* retval_d */ - 0, /* movr_w_f */ - 0, /* movr_ww_d */ - 0, /* movr_w_d */ - 0, /* movr_f_w */ - 0, /* movi_f_w */ - 0, /* movr_d_ww */ - 0, /* movi_d_ww */ - 4, /* movr_d_w */ - 12, /* movi_d_w */ -#endif /* NEW_ABI */ -#endif /* __WORDSIZE */ - -#if __WORDSIZE == 32 -#if !NEW_ABI #define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ - 0, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -419,7 +15,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -427,8 +26,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 20, /* va_arg_d */ @@ -483,15 +94,15 @@ 4, /* ltr_u */ 4, /* lti_u */ 8, /* ler */ - 12, /* lei */ + 4, /* lei */ 8, /* ler_u */ - 12, /* lei_u */ + 4, /* lei_u */ 12, /* eqr */ 12, /* eqi */ 8, /* ger */ - 12, /* gei */ + 8, /* gei */ 8, /* ger_u */ - 12, /* gei_u */ + 8, /* gei_u */ 4, /* gtr */ 8, /* gti */ 4, /* gtr_u */ @@ -500,14 +111,21 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ + 4, /* movnr */ + 4, /* movzr */ + 36, /* casr */ + 44, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ - 20, /* htonr_us */ - 52, /* htonr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ + 4, /* htonr_us */ + 4, /* htonr_ui */ 0, /* htonr_ul */ 4, /* ldr_c */ 12, /* ldi_c */ @@ -595,16 +213,40 @@ 20, /* bxsubi_u */ 8, /* jmpr */ 8, /* jmpi */ - 12, /* callr */ + 8, /* callr */ 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -804,25 +446,28 @@ 8, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ -#endif /* NEW_ABI */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ - 4, /* align */ + 16, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 44, /* prolog */ + 76, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -830,11 +475,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 28, /* addi */ @@ -848,7 +505,7 @@ 36, /* subci */ 28, /* subxr */ 28, /* subxi */ - 32, /* rsbi */ + 36, /* rsbi */ 8, /* mulr */ 32, /* muli */ 12, /* qmulr */ @@ -868,7 +525,7 @@ 8, /* remr_u */ 32, /* remi_u */ 4, /* andr */ - 28, /* andi */ + 16, /* andi */ 4, /* orr */ 28, /* ori */ 4, /* xorr */ @@ -886,15 +543,15 @@ 4, /* ltr_u */ 4, /* lti_u */ 8, /* ler */ - 12, /* lei */ + 4, /* lei */ 8, /* ler_u */ - 12, /* lei_u */ + 4, /* lei_u */ 12, /* eqr */ 12, /* eqi */ 8, /* ger */ - 12, /* gei */ + 8, /* gei */ 8, /* ger_u */ - 12, /* gei_u */ + 8, /* gei_u */ 4, /* gtr */ 8, /* gti */ 4, /* gtr_u */ @@ -903,59 +560,66 @@ 8, /* nei */ 4, /* movr */ 28, /* movi */ + 4, /* movnr */ + 4, /* movzr */ + 36, /* casr */ + 56, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 8, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ 4, /* htonr_us */ - 4, /* htonr_ui */ + 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 24, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 24, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 24, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 24, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 24, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 24, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 24, /* ldi_l */ 8, /* ldxr_c */ - 4, /* ldxi_c */ + 16, /* ldxi_c */ 8, /* ldxr_uc */ - 4, /* ldxi_uc */ + 16, /* ldxi_uc */ 8, /* ldxr_s */ - 4, /* ldxi_s */ + 16, /* ldxi_s */ 8, /* ldxr_us */ - 4, /* ldxi_us */ + 16, /* ldxi_us */ 8, /* ldxr_i */ - 4, /* ldxi_i */ + 16, /* ldxi_i */ 8, /* ldxr_ui */ - 4, /* ldxi_ui */ + 16, /* ldxi_ui */ 8, /* ldxr_l */ - 4, /* ldxi_l */ + 16, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 24, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 24, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 24, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 24, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 16, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 16, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 16, /* stxi_i */ 8, /* stxr_l */ - 4, /* stxi_l */ + 16, /* stxi_l */ 12, /* bltr */ 12, /* blti */ 12, /* bltr_u */ @@ -996,18 +660,42 @@ 28, /* bxsubi */ 16, /* bxsubr_u */ 20, /* bxsubi_u */ - 0, /* jmpr */ + 8, /* jmpr */ 8, /* jmpi */ - 12, /* callr */ + 8, /* callr */ 32, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1015,93 +703,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 44, /* epilog */ + 76, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 4, /* addr_f */ - 16, /* addi_f */ + 28, /* addi_f */ 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ + 28, /* subi_f */ + 28, /* rsbi_f */ 4, /* mulr_f */ - 16, /* muli_f */ + 28, /* muli_f */ 4, /* divr_f */ - 16, /* divi_f */ + 28, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ 16, /* ltr_f */ - 28, /* lti_f */ + 40, /* lti_f */ 16, /* ler_f */ - 28, /* lei_f */ + 40, /* lei_f */ 16, /* eqr_f */ - 28, /* eqi_f */ + 40, /* eqi_f */ 16, /* ger_f */ - 28, /* gei_f */ + 40, /* gei_f */ 16, /* gtr_f */ - 28, /* gti_f */ + 40, /* gti_f */ 16, /* ner_f */ - 28, /* nei_f */ + 40, /* nei_f */ 16, /* unltr_f */ - 28, /* unlti_f */ + 40, /* unlti_f */ 16, /* unler_f */ - 28, /* unlei_f */ + 40, /* unlei_f */ 16, /* uneqr_f */ - 28, /* uneqi_f */ + 40, /* uneqi_f */ 16, /* unger_f */ - 28, /* ungei_f */ + 40, /* ungei_f */ 16, /* ungtr_f */ - 28, /* ungti_f */ + 40, /* ungti_f */ 16, /* ltgtr_f */ - 28, /* ltgti_f */ + 40, /* ltgti_f */ 16, /* ordr_f */ - 28, /* ordi_f */ + 40, /* ordi_f */ 16, /* unordr_f */ - 28, /* unordi_f */ + 40, /* unordi_f */ 8, /* truncr_f_i */ 8, /* truncr_f_l */ 8, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 12, /* movi_f */ + 24, /* movi_f */ 4, /* ldr_f */ - 12, /* ldi_f */ + 24, /* ldi_f */ 8, /* ldxr_f */ - 4, /* ldxi_f */ + 16, /* ldxi_f */ 4, /* str_f */ - 12, /* sti_f */ + 24, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 16, /* stxi_f */ 12, /* bltr_f */ - 24, /* blti_f */ + 36, /* blti_f */ 12, /* bler_f */ - 24, /* blei_f */ + 36, /* blei_f */ 12, /* beqr_f */ - 24, /* beqi_f */ + 36, /* beqi_f */ 12, /* bger_f */ - 24, /* bgei_f */ + 36, /* bgei_f */ 12, /* bgtr_f */ - 24, /* bgti_f */ + 36, /* bgti_f */ 12, /* bner_f */ - 24, /* bnei_f */ + 36, /* bnei_f */ 12, /* bunltr_f */ - 24, /* bunlti_f */ + 36, /* bunlti_f */ 12, /* bunler_f */ - 24, /* bunlei_f */ + 36, /* bunlei_f */ 12, /* buneqr_f */ - 24, /* buneqi_f */ + 36, /* buneqi_f */ 12, /* bunger_f */ - 24, /* bungei_f */ + 36, /* bungei_f */ 12, /* bungtr_f */ - 24, /* bungti_f */ + 36, /* bungti_f */ 12, /* bltgtr_f */ - 24, /* bltgti_f */ + 36, /* bltgti_f */ 12, /* bordr_f */ - 24, /* bordi_f */ + 36, /* bordi_f */ 12, /* bunordr_f */ - 24, /* bunordi_f */ + 36, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -1112,87 +800,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 16, /* addi_d */ + 28, /* addi_d */ 4, /* subr_d */ - 16, /* subi_d */ - 16, /* rsbi_d */ + 28, /* subi_d */ + 28, /* rsbi_d */ 4, /* mulr_d */ - 16, /* muli_d */ + 28, /* muli_d */ 4, /* divr_d */ - 16, /* divi_d */ + 28, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 28, /* lti_d */ + 44, /* lti_d */ 16, /* ler_d */ - 28, /* lei_d */ + 44, /* lei_d */ 16, /* eqr_d */ - 28, /* eqi_d */ + 44, /* eqi_d */ 16, /* ger_d */ - 28, /* gei_d */ + 44, /* gei_d */ 16, /* gtr_d */ - 28, /* gti_d */ + 44, /* gti_d */ 16, /* ner_d */ - 28, /* nei_d */ + 44, /* nei_d */ 16, /* unltr_d */ - 28, /* unlti_d */ + 44, /* unlti_d */ 16, /* unler_d */ - 28, /* unlei_d */ + 44, /* unlei_d */ 16, /* uneqr_d */ - 28, /* uneqi_d */ + 44, /* uneqi_d */ 16, /* unger_d */ - 28, /* ungei_d */ + 44, /* ungei_d */ 16, /* ungtr_d */ - 28, /* ungti_d */ + 44, /* ungti_d */ 16, /* ltgtr_d */ - 28, /* ltgti_d */ + 44, /* ltgti_d */ 16, /* ordr_d */ - 28, /* ordi_d */ + 44, /* ordi_d */ 16, /* unordr_d */ - 28, /* unordi_d */ + 44, /* unordi_d */ 8, /* truncr_d_i */ 8, /* truncr_d_l */ 8, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 12, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ - 12, /* ldi_d */ + 24, /* ldi_d */ 8, /* ldxr_d */ - 4, /* ldxi_d */ + 16, /* ldxi_d */ 4, /* str_d */ - 12, /* sti_d */ + 24, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 16, /* stxi_d */ 12, /* bltr_d */ - 24, /* blti_d */ + 36, /* blti_d */ 12, /* bler_d */ - 24, /* blei_d */ + 36, /* blei_d */ 12, /* beqr_d */ - 24, /* beqi_d */ + 36, /* beqi_d */ 12, /* bger_d */ - 24, /* bgei_d */ + 36, /* bgei_d */ 12, /* bgtr_d */ - 24, /* bgti_d */ + 36, /* bgti_d */ 12, /* bner_d */ - 24, /* bnei_d */ + 40, /* bnei_d */ 12, /* bunltr_d */ - 24, /* bunlti_d */ + 40, /* bunlti_d */ 12, /* bunler_d */ - 24, /* bunlei_d */ + 40, /* bunlei_d */ 12, /* buneqr_d */ - 24, /* buneqi_d */ + 40, /* buneqi_d */ 12, /* bunger_d */ - 24, /* bungei_d */ + 40, /* bungei_d */ 12, /* bungtr_d */ - 24, /* bungti_d */ + 40, /* bungti_d */ 12, /* bltgtr_d */ - 24, /* bltgti_d */ + 36, /* bltgti_d */ 12, /* bordr_d */ - 24, /* bordi_d */ + 36, /* bordi_d */ 12, /* bunordr_d */ - 24, /* bunordi_d */ + 40, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -1206,5 +894,5 @@ 0, /* movr_d_ww */ 0, /* movi_d_ww */ 4, /* movr_d_w */ - 12, /* movi_d_w */ + 24, /* movi_d_w */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index 06e7cbf53..dfcf28448 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -54,8 +54,8 @@ typedef struct jit_pointer_t jit_va_list_t; /* * Prototypes */ -#define jit_make_arg(node) _jit_make_arg(_jit,node) -static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*); +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); #define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); #define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) @@ -259,20 +259,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -332,7 +330,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); #if NEW_ABI @@ -343,7 +341,7 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) } static jit_node_t * -_jit_make_arg(jit_state_t *_jit, jit_node_t *node) +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) { jit_int32_t offset; #if NEW_ABI @@ -361,7 +359,7 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node) _jitc->function->self.size += STACK_SLOT; #endif if (node == (jit_node_t *)0) - node = jit_new_node(jit_code_arg); + node = jit_new_node(code); else link_node(node); node->u.w = offset; @@ -498,10 +496,14 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { assert(_jitc->function); - return (jit_make_arg((jit_node_t*)0)); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif + return (jit_make_arg((jit_node_t*)0, code)); } jit_node_t * @@ -521,7 +523,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _A0 - v->u.w); @@ -533,7 +535,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _A0 - v->u.w); @@ -545,7 +547,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _A0 - v->u.w); @@ -557,7 +559,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _A0 - v->u.w); @@ -569,7 +571,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 64 @@ -587,7 +589,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _A0 - v->u.w); @@ -599,7 +601,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _A0 - v->u.w); @@ -610,10 +612,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - jit_inc_synth_wp(putargr, u, v); - assert(v->code == jit_code_arg); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_A0 - v->u.w, u); else @@ -622,11 +624,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_A0 - v->u.w, u); else { @@ -791,9 +793,9 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if NEW_ABI assert(_jitc->function); @@ -820,14 +822,14 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; #if !NEW_ABI jit_word_t offset; #endif assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if NEW_ABI if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -1073,8 +1075,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; - jit_movr(_T9, r0); - call = jit_callr(_T9); + call = jit_callr(r0); call->v.w = _jitc->function->self.argi; #if NEW_ABI call->w.w = call->v.w; @@ -1186,6 +1187,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; } undo; @@ -1287,11 +1292,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1406,6 +1413,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1415,6 +1427,16 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1687,9 +1709,12 @@ _emit_code(jit_state_t *_jit) temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w); - if (!(temp->flag & jit_flag_patch)) + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = calli_p(_jit->pc.w); patch(word, node); + } } else calli(node->u.w); @@ -1698,6 +1723,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1715,6 +1744,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1766,14 +1805,26 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1783,10 +1834,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index 475bc96cc..2d58355bb 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -21,17 +21,27 @@ static char *code_name[] = { "data", "live", "align", "save", "load", + "skip", "#name", "#note", "label", "prolog", "ellipsis", "va_push", "allocai", "allocar", - "arg", + "arg_c", + "arg_s", + "arg_i", + "arg_l", "getarg_c", "getarg_uc", "getarg_s", "getarg_us", "getarg_i", "getarg_ui", "getarg_l", - "putargr", "putargi", + "putargr_c", "putargi_c", + "putargr_uc", "putargi_uc", + "putargr_s", "putargi_s", + "putargr_us", "putargi_us", + "putargr_i", "putargi_i", + "putargr_ui", "putargi_ui", + "putargr_l", "putargi_l", "va_start", "va_arg", "va_arg_d", "va_end", @@ -69,9 +79,13 @@ static char *code_name[] = { "gtr_u", "gti_u", "ner", "nei", "movr", "movi", + "movnr", "movzr", + "casr", "casi", "extr_c", "extr_uc", "extr_s", "extr_us", "extr_i", "extr_ui", + "bswapr_us", + "bswapr_ui", "bswapr_ul", "htonr_us", "htonr_ui", "htonr_ul", "ldr_c", "ldi_c", @@ -119,10 +133,22 @@ static char *code_name[] = { "jmpr", "jmpi", "callr", "calli", "prepare", - "pushargr", "pushargi", + "pushargr_c", "pushargi_c", + "pushargr_uc", "pushargi_uc", + "pushargr_s", "pushargi_s", + "pushargr_us", "pushargi_us", + "pushargr_i", "pushargi_i", + "pushargr_ui", "pushargi_ui", + "pushargr_l", "pushargi_l", "finishr", "finishi", "ret", - "retr", "reti", + "retr_c", "reti_c", + "retr_uc", "reti_uc", + "retr_s", "reti_s", + "retr_us", "reti_us", + "retr_i", "reti_i", + "retr_ui", "reti_ui", + "retr_l", "reti_l", "retval_c", "retval_uc", "retval_s", "retval_us", "retval_i", "retval_ui", diff --git a/deps/lightning/lib/jit_note.c b/deps/lightning/lib/jit_note.c index c79b81863..f1c149fc1 100644 --- a/deps/lightning/lib/jit_note.c +++ b/deps/lightning/lib/jit_note.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index 9e99771fe..679da0386 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -260,7 +260,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LHAU(d,a,s) FDs(43,d,a,s) # define LHAUX(d,a,b) FX(31,d,a,b,375) # define LHAX(d,a,b) FX(31,d,a,b,343) -# define LHRBX(d,a,b) FX(31,d,a,b,790) +# define LHBRX(d,a,b) FX(31,d,a,b,790) # define LHZ(d,a,s) FDs(40,d,a,s) # define LHZU(d,a,s) FDs(41,d,a,s) # define LHZUX(d,a,b) FX(31,d,a,b,311) @@ -271,6 +271,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LSWI(d,a,n) FX(31,d,a,n,597) # define LSWX(d,a,b) FX(31,d,a,b,533) # define LWARX(d,a,b) FX(31,d,a,b,20) +# define LDARX(d,a,b) FX(31,d,a,b,84) # define LWBRX(d,a,b) FX(31,d,a,b,534) # define LWA(d,a,s) FDs(58,d,a,s|2) # define LWAUX(d,a,b) FX(31,d,a,b,373) @@ -446,6 +447,7 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define STW(s,a,d) FDs(36,s,a,d) # define STWBRX(s,a,b) FX(31,s,a,b,662) # define STWCX_(s,a,b) FX_(31,s,a,b,150) +# define STDCX_(s,a,b) FX_(31,s,a,b,214) # define STWU(s,a,d) FDs(37,s,a,d) # define STWUX(s,a,b) FX(31,s,a,b,183) # define STWX(s,a,b) FX(31,s,a,b,151) @@ -505,8 +507,17 @@ static void _nop(jit_state_t*,jit_int32_t); static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) static void _movi(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) # define extr_c(r0,r1) EXTSB(r0,r1) @@ -517,23 +528,14 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define extr_i(r0,r1) EXTSW(r0,r1) # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif -# if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif -# else -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif +# define bswapr_us_lh(r0,r1,no_flag) _bswapr_us(_jit,r0,r1,no_flag) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1,0) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define bswapr_ui_lw(r0,r1,no_flag) _bswapr_ui(_jit,r0,r1,no_flag) +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1,0) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define addr(r0,r1,r2) ADD(r0,r1,r2) # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -856,14 +858,14 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused; # define callr(r0,i0) _callr(_jit,r0,i0) static void _callr(jit_state_t*,jit_int32_t,jit_int32_t); # define calli(i0,i1) _calli(_jit,i0,i1) -static void _calli(jit_state_t*,jit_word_t,jit_int32_t); +static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t); # define calli_p(i0,i1) _calli_p(_jit,i0,i1) static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t); # else # define callr(r0) _callr(_jit,r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); #endif @@ -1120,6 +1122,22 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPWI(r2, 0); + BEQ(8); + MR(r0, r1); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPWI(r2, 0); + BNE(8); + MR(r0, r1); +} + static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1138,24 +1156,111 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (word); } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LWARX(r0, _R0_REGNO, r1); +# else + LDARX(r0, _R0_REGNO, r1); +# endif + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ +# if __WORDSIZE == 32 + STWCX_(r3, _R0_REGNO, r1); +# else + STDCX_(r3, _R0_REGNO, r1); +# endif + jump1 = _jit->pc.w; + BNE(0); /* BNE retry */ + /* done: */ + done = _jit->pc.w; + ISYNC(); + MFCR(r0); + EXTRWI(r0, r0, 1, CR_EQ); + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) { - jit_int32_t reg; + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) { + /* Convert LHZX to LHBRX */ + _jit->pc.ui--; + LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) { + /* Convert LHZ to LHBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LHBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + + if (r0 == r1) { + RLWIMI(r0, r0, 16, 8, 15); + RLWINM(r0, r0, 24, 16, 31); + } else { + RLWINM(r0, r1, 8, 16, 23); + RLWIMI(r0, r1, 24, 24, 31); + } +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) +{ + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) { + /* Convert LWZX to LWBRX */ + _jit->pc.ui--; + LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) { + /* Convert LWZ to LWBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LWBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + reg = jit_get_reg(jit_class_gpr); ROTLWI(rn(reg), r1, 8); RLWIMI(rn(reg), r1, 24, 0, 7); @@ -1168,22 +1273,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } -# if __WORDSIZE == 64 -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); -} -# endif -# endif - static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1437,15 +1526,23 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +# define is_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; + jit_int32_t reg, offt; if (can_zero_extend_short_p(i0)) ANDI_(r0, r1, i0); else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) ANDIS_(r0, r1, (jit_uword_t)i0 >> 16); - else { + else if (__WORDSIZE == 32 && is_mask(i0)) { + offt = __builtin_ctzl(i0); + RLWINM(r0, r1, 0, 32 - offt - __builtin_popcountl(i0), 31 - offt); + } else if (__WORDSIZE == 32 && is_mask(~i0)) { + offt = __builtin_ctzl(~i0); + RLWINM(r0, r1, 0, 32 - offt, 31 - offt - __builtin_popcountl(~i0)); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); AND(r0, r1, rn(reg)); @@ -3204,21 +3301,28 @@ _callr(jit_state_t *_jit, jit_int32_t r0 } /* assume fixed address or reachable address */ -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0 # if _CALL_SYSV , jit_int32_t varargs # endif ) { + jit_word_t w; # if _CALL_SYSV jit_word_t d; - d = (i0 - _jit->pc.w) & ~3; - if (can_sign_extend_jump_p(d)) + d = (i0 - _jit->pc.w - !!varargs * 4) & ~3; + if (can_sign_extend_jump_p(d)) { + /* Tell double arguments were passed in registers. */ + if (varargs) + CREQV(6, 6, 6); + w = _jit->pc.w; BL(d); + } else # endif { + w = _jit->pc.w; movi(_R12_REGNO, i0); callr(_R12_REGNO # if _CALL_SYSV @@ -3226,6 +3330,7 @@ _calli(jit_state_t *_jit, jit_word_t i0 # endif ); } + return (w); } /* absolute jump */ @@ -3549,7 +3654,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_short_p(d)) { /* use absolute address */ assert(can_sign_extend_short_p(label)); - d |= 2; + d = label | 2; } u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe); break; @@ -3577,9 +3682,9 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_jump_p(d)) { /* use absolute address */ assert(can_sign_extend_jump_p(label)); - d |= 2; + d = label | 2; } - u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe); + u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd); break; case 15: /* LI */ #if __WORDSIZE == 32 diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index 1e84f8e36..a2edbd89a 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -143,8 +143,17 @@ static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t); # define absr_d(r0,r1) FABS(r0,r1) # define negr_f(r0,r1) negr_d(r0,r1) # define negr_d(r0,r1) FNEG(r0,r1) -# define sqrtr_f(r0,r1) FSQRTS(r0,r1) -# define sqrtr_d(r0,r1) FSQRT(r0,r1) +# ifdef _ARCH_PPCSQ +# define sqrtr_f(r0,r1) FSQRTS(r0,r1) +# define sqrtr_d(r0,r1) FSQRT(r0,r1) +# else +extern float sqrtf(float); +# define sqrtr_f(r0,r1) _sqrtr_f(_jit,r0,r1) +static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t); +extern double sqrt(double); +# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) +static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define addr_f(r0,r1,r2) FADDS(r0,r1,r2) # define addr_d(r0,r1,r2) FADD(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) @@ -484,23 +493,40 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) ldi_d(r0, (jit_word_t)i0); } -/* should only work on newer ppc (fcfid is a ppc64 instruction) */ static void _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { # if __WORDSIZE == 32 - jit_int32_t reg; + jit_int32_t reg, freg, off1, off2; + +# if __BYTE_ORDER == __BIG_ENDIAN + off1 = alloca_offset - 8; + off2 = alloca_offset - 4; +# else + off1 = alloca_offset - 4; + off2 = alloca_offset - 8; +# endif + reg = jit_get_reg(jit_class_gpr); - rshi(rn(reg), r1, 31); - /* use reserved 8 bytes area */ - stxi(alloca_offset - 4, _FP_REGNO, r1); - stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); + freg = jit_get_reg(jit_class_fpr); + + movi(rn(reg), 0x43300000); + stxi_i(off1, _FP_REGNO, rn(reg)); + movi(rn(reg), 0x80000000); + stxi_i(off2, _FP_REGNO, rn(reg)); + ldxi_d(rn(freg), _FP_REGNO, alloca_offset - 8); + xorr(rn(reg), r1, rn(reg)); + stxi_i(off2, _FP_REGNO, rn(reg)); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + subr_d(r0, r0, rn(freg)); + jit_unget_reg(reg); + jit_unget_reg(freg); # else stxi(alloca_offset - 8, _FP_REGNO, r1); -# endif ldxi_d(r0, _FP_REGNO, alloca_offset - 8); FCFID(r0, r0); +# endif } static void @@ -511,7 +537,11 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) FCTIWZ(rn(reg), r1); /* use reserved 8 bytes area */ stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg)); +# if __BYTE_ORDER == __BIG_ENDIAN ldxi_i(r0, _FP_REGNO, alloca_offset - 4); +# else + ldxi_i(r0, _FP_REGNO, alloca_offset - 8); +# endif jit_unget_reg(reg); } @@ -529,6 +559,32 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } # endif +# ifndef _ARCH_PPCSQ +static void +_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr_f(rn(JIT_FA0), r1); + calli((jit_word_t)sqrtf +# if _CALL_SYSV + , 0 +# endif + ); + movr_f(r0, rn(JIT_FRET)); +} + +static void +_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr_d(rn(JIT_FA0), r1); + calli((jit_word_t)sqrt +# if _CALL_SYSV + , 0 +# endif + ); + movr_d(r0, rn(JIT_FRET)); +} +# endif + # define fpr_opi(name, type, size) \ static void \ _##name##i_##type(jit_state_t *_jit, \ diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 788ac45c8..8d37a30f7 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -1,22 +1,26 @@ #if __WORDSIZE == 32 #if defined(__powerpc__) #if __BYTE_ORDER == __BIG_ENDIAN -#if _CALL_SYSV -#define JIT_INSTR_MAX 124 +#if !_CALL_SYSV +#define JIT_INSTR_MAX 136 0, /* data */ 0, /* live */ - 0, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 124, /* prolog */ + 136, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -24,11 +28,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 36, /* va_start */ - 52, /* va_arg */ - 64, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -62,7 +78,7 @@ 12, /* remr_u */ 20, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 4, /* andi */ 4, /* orr */ 12, /* ori */ 4, /* xorr */ @@ -97,12 +113,19 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -192,16 +215,40 @@ 16, /* bxsubi_u */ 8, /* jmpr */ 4, /* jmpi */ - 12, /* callr */ - 20, /* calli */ + 28, /* callr */ + 36, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -256,7 +303,7 @@ 24, /* unordi_f */ 12, /* truncr_f_i */ 0, /* truncr_f_l */ - 20, /* extr_f */ + 36, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ 12, /* movi_f */ @@ -347,10 +394,10 @@ 32, /* unordi_d */ 12, /* truncr_d_i */ 0, /* truncr_d_l */ - 20, /* extr_d */ + 36, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 24, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 4, /* ldxr_d */ @@ -401,7 +448,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ -#endif /* _CALL_SYV */ +#endif /* !_CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -409,22 +456,26 @@ #if __WORDSIZE == 32 #if defined(__powerpc__) #if __BYTE_ORDER == __BIG_ENDIAN -#if !_CALL_SYSV -#define JIT_INSTR_MAX 136 +#if _CALL_SYSV +#define JIT_INSTR_MAX 124 0, /* data */ 0, /* live */ 0, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 136, /* prolog */ + 124, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -432,11 +483,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 4, /* va_start */ - 8, /* va_arg */ - 8, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 36, /* va_start */ + 52, /* va_arg */ + 64, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -470,7 +533,7 @@ 12, /* remr_u */ 20, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 4, /* andi */ 4, /* orr */ 12, /* ori */ 4, /* xorr */ @@ -505,12 +568,19 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -600,16 +670,40 @@ 16, /* bxsubi_u */ 8, /* jmpr */ 4, /* jmpi */ - 28, /* callr */ - 40, /* calli */ + 12, /* callr */ + 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -664,7 +758,7 @@ 24, /* unordi_f */ 12, /* truncr_f_i */ 0, /* truncr_f_l */ - 20, /* extr_f */ + 36, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ 12, /* movi_f */ @@ -755,10 +849,10 @@ 32, /* unordi_d */ 12, /* truncr_d_i */ 0, /* truncr_d_l */ - 20, /* extr_d */ + 36, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 24, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 4, /* ldxr_d */ @@ -809,8 +903,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ -#endif /* _CALL_AIX */ -#endif /* __BYTEORDER */ +#endif /* _CALL_SYSV */ +#endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -823,6 +917,7 @@ 4, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -831,7 +926,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -839,8 +937,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -912,12 +1022,19 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 4, /* htonr_ul */ @@ -1008,15 +1125,39 @@ 8, /* jmpr */ 4, /* jmpi */ 28, /* callr */ - 56, /* calli */ + 52, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1216,7 +1357,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ -#endif /* __BYTEORDER */ +#endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1226,9 +1367,10 @@ #define JIT_INSTR_MAX 124 0, /* data */ 0, /* live */ - 4, /* align */ + 28, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -1237,7 +1379,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -1245,8 +1390,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -1318,13 +1475,20 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ - 20, /* htonr_us */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ + 8, /* htonr_us */ 16, /* htonr_ui */ 44, /* htonr_ul */ 8, /* ldr_c */ @@ -1414,15 +1578,39 @@ 8, /* jmpr */ 4, /* jmpi */ 12, /* callr */ - 36, /* calli */ + 32, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index 73a936d5a..cd2e8904b 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -291,20 +291,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -364,7 +362,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -404,12 +402,16 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; jit_bool_t incr = 1; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; #if _CALL_SYSV @@ -420,7 +422,7 @@ _jit_arg(jit_state_t *_jit) offset = _jitc->function->self.size; if (incr) _jitc->function->self.size += sizeof(jit_word_t); - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -498,7 +500,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); @@ -510,7 +512,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); @@ -522,7 +524,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); @@ -534,7 +536,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); @@ -546,7 +548,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 32 @@ -564,7 +566,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); @@ -576,7 +578,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); @@ -587,10 +589,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); else @@ -599,11 +601,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - jit_inc_synth_wp(putargi, u, v); - assert(v->code == jit_code_arg); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { @@ -698,11 +700,11 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { jit_bool_t incr = 1; assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -719,12 +721,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; jit_bool_t incr = 1; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -1148,9 +1150,15 @@ _emit_code(jit_state_t *_jit) jit_word_t word; jit_int32_t value; jit_int32_t offset; + jit_bool_t no_flag = 0; /* Set if previous instruction is + * *not* a jump target. */ struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_word_t patch_offset; #if _CALL_AIXDESC jit_word_t prolog_offset; @@ -1273,11 +1281,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1342,9 +1352,28 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); # if __WORDSIZE == 64 case_rr(hton, _ul); +# endif + case jit_code_bswapr_us: + bswapr_us_lh(rn(node->u.w), rn(node->v.w), no_flag); + break; + case jit_code_bswapr_ui: + bswapr_ui_lw(rn(node->u.w), rn(node->v.w), no_flag); + break; +# if __WORDSIZE == 64 + case_rr(bswap, _ul); # endif case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1658,44 +1687,61 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_jump_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } else - (void)jmpi_p(node->u.w); + jmpi(node->u.w); break; case jit_code_callr: - callr(rn(node->u.w) #if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) +# define xcallr(u, v) callr(u, v) +# define xcalli_p(u, v) calli_p(u, v) +# define xcalli(u, v) calli(u, v) +#else +# define xcallr(u, v) callr(u) +# define xcalli_p(u, v) calli_p(u) +# define xcalli(u, v) calli(u) #endif - ); + xcallr(rn(node->u.w), !!(node->flag & jit_flag_varargs)); break; case jit_code_calli: + value = !!(node->flag & jit_flag_varargs); if (node->flag & jit_flag_node) { temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w + if (temp->flag & jit_flag_patch) + xcalli(temp->u.w, value); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); #if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) + if (can_sign_extend_jump_p(word + value * 4)) + word = xcalli(_jit->pc.w, value); + else #endif - ); - if (!(temp->flag & jit_flag_patch)) + word = xcalli_p(_jit->pc.w, value); patch(word, node); + } } else - calli(node->u.w -#if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) -#endif - ); + xcalli(node->u.w, value); break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; #if _CALL_AIXDESC undo.prolog_offset = _jitc->prolog.offset; @@ -1736,6 +1782,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; #if _CALL_AIXDESC _jitc->prolog.offset = undo.prolog_offset; @@ -1757,14 +1813,26 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1774,10 +1842,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1797,6 +1881,8 @@ _emit_code(jit_state_t *_jit) assert(_jitc->regarg == 0 && _jitc->synth == 0); /* update register live state */ jit_reglive(node); + + no_flag = !(node->flag & jit_flag_patch); } #undef case_brf #undef case_brw diff --git a/deps/lightning/lib/jit_rewind.c b/deps/lightning/lib/jit_rewind.c index 5ef1be5e7..e92737d20 100644 --- a/deps/lightning/lib/jit_rewind.c +++ b/deps/lightning/lib/jit_rewind.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2019 Free Software Foundation, Inc. + * Copyright (C) 2015-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -71,9 +71,10 @@ _rewind_prolog(jit_state_t *_jit) for (; node; node = next) { next = node->next; switch (node->code) { - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: node->next = (jit_node_t *)0; - jit_make_arg(node); + jit_make_arg(node, node->code); break; case jit_code_arg_f: node->next = (jit_node_t *)0; diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c index 388489fb2..a4cf7f466 100644 --- a/deps/lightning/lib/jit_riscv-cpu.c +++ b/deps/lightning/lib/jit_riscv-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -434,12 +434,9 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0, r1) andi(r0, r1, 0xff) @@ -455,6 +452,15 @@ static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0, im) _movi_p(_jit, r0, im) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ltr(r0, r1, r2) SLT(r0, r1, r2) # define lti(r0, r1, im) _lti(_jit, r0, r1, im) static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -573,12 +579,12 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) JALR(_ZERO_REGNO, r0, 0) # define jmpi(im) _jmpi(_jit, im) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(im) _jmpi_p(_jit, im) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) JALR(_RA_REGNO, r0, 0) # define calli(im) _calli(_jit, im) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(im) _calli_p(_jit, im) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -1243,59 +1249,6 @@ DEFST(s, H) DEFST(i, W) DEFST(l, D) -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi_u(rn(t0), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(t0), rn(t0)); - lshi(r0, r0, 32); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -1327,7 +1280,9 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { +# if __WORDSIZE == 64 if (simm32_p(i0)) { +# endif jit_int32_t lo = (jit_int32_t)i0 << 20 >> 20; jit_int32_t hi = i0 - lo; if (hi) { @@ -1337,40 +1292,81 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } else ADDIW(r0, _ZERO_REGNO, lo); +# if __WORDSIZE == 64 } - else { - jit_int32_t lo = i0 << 32 >> 32; - jit_word_t hi = i0 - lo; - jit_int32_t t0 = jit_get_reg(jit_class_gpr); - movi(rn(t0), (jit_int32_t)(hi >> 32)); - movi(r0, lo); - lshi(rn(t0), rn(t0), 32); - addr(r0, r0, rn(t0)); - jit_unget_reg(t0); - } + else + load_const(r0, i0); +# endif } static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_word_t w; - jit_int32_t t0; - jit_int32_t ww = i0 << 32 >> 32; - jit_int32_t lo = ww << 20 >> 20; - jit_int32_t hi = ww - lo; w = _jit->pc.w; +# if __WORDSIZE == 64 + AUIPC(r0, 0); + ADDI(r0, r0, 0); + LD(r0, r0, 0); +# else + LUI(r0, 0); + ADDIW(r0, r0, 0); +# endif + return (w); +} + +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + movr(r0, r1); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + movr(r0, r1); + patch_at(w, _jit->pc.w); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t t0, r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } t0 = jit_get_reg(jit_class_gpr); - LUI(r0, hi >> 12); - ADDIW(r0, r0, lo); - ww = i0 >> 32; - lo = ww << 20 >> 20; - hi = ww - lo; - LUI(rn(t0), hi >> 12); - ADDIW(rn(t0), rn(t0), lo); - SLLI(rn(t0), rn(t0), 32); - ADD(r0, r0, rn(t0)); + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LR_W(r0, r1); +# else + LR_D(r0, r1); +# endif + jump0 = _jit->pc.w; + BNE(r0, r2, 0); +# if __WORDSIZE == 32 + SC_W(rn(t0), r1, r3); +# else + SC_D(rn(t0), r1, r3); +# endif + jump1 = _jit->pc.w; + BNE(rn(t0), _ZERO_REGNO, 0); + done = _jit->pc.w; + eqr(r0, r0, r2); + patch_at(jump0, done); + patch_at(jump1, retry); jit_unget_reg(t0); - return (w); + if (iscasi) + jit_unget_reg(r1_reg); } static void @@ -2091,12 +2087,13 @@ _bmci(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0) return (w); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { jit_int32_t t0; - jit_word_t dsp; - dsp = i0 - _jit->pc.w; + jit_word_t dsp, w; + w = _jit->pc.w; + dsp = i0 - w; if (simm20_p(dsp)) JAL(_ZERO_REGNO, dsp); else { @@ -2105,6 +2102,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(t0)); jit_unget_reg(t0); } + return (w); } static jit_word_t @@ -2119,12 +2117,13 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { jit_int32_t t0; - jit_word_t dsp; - dsp = i0 - _jit->pc.w; + jit_word_t dsp, w; + w = _jit->pc.w; + dsp = i0 - w; if (simm20_p(dsp)) JAL(_RA_REGNO, dsp); else { @@ -2133,6 +2132,7 @@ _calli(jit_state_t *_jit, jit_word_t i0) callr(rn(t0)); jit_unget_reg(t0); } + return (w); } static jit_word_t @@ -2318,42 +2318,43 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) u.w = instr; i.w = u.i[0]; /* movi_p? */ +# if __WORDSIZE == 64 + if (i.U.opcode == 23) { /* AUIPC */ + jit_int32_t lo, hi; + jit_word_t address, relative; + address = get_const(label); + relative = address - instr; + assert(simm32_p(relative)); + lo = (jit_int32_t)relative << 20 >> 20; + hi = relative - lo; + i.U.imm12_31 = hi >> 12; + u.i[0] = i.w; + i.w = u.i[1]; + if (i.I.opcode == 19 && i.I.funct3 == 0) { /* ADDI */ + i.I.imm11_0 = lo; + u.i[1] = i.w; + i.w = u.i[2]; + } + else + abort(); + assert(i.I.opcode == 3 && i.I.funct3 == 3); /* LD */ + } +# else if (i.U.opcode == 55) { /* LUI */ - jit_int32_t ww = label << 32 >> 32; - jit_int32_t lo = ww << 20 >> 20; - jit_int32_t hi = ww - lo; + jit_int32_t lo = (jit_int32_t)label << 20 >> 20; + jit_int32_t hi = label - lo; i.U.imm12_31 = hi >> 12; u.i[0] = i.w; i.w = u.i[1]; if (i.I.opcode == 27 && i.I.funct3 == 0) { /* ADDIW */ - i.I.imm11_0 = lo & 0xfff; + i.I.imm11_0 = lo; u.i[1] = i.w; i.w = u.i[2]; - if (i.U.opcode == 55) { /* LUI */ - ww = label >> 32; - lo = ww << 20 >> 20; - hi = ww - lo; - i.U.imm12_31 = hi >> 12; - u.i[2] = i.w; - i.w = u.i[3]; - if (i.I.opcode == 27 && i.I.funct3 == 0) { /* ADDIW */ - i.I.imm11_0 = lo & 0xfff; - u.i[3] = i.w; - i.w = u.i[4]; - assert(i.IS.opcode == 19); /* SLLI */ - assert(i.IS.shamt == 32); - i.w = u.i[5]; - assert(i.R.opcode == 51); /* ADD */ - } - else - abort(); - } - else - abort(); } else abort(); } +# endif /* b{lt,le,eq,ge,gt,ne}{,_u}? */ else if (i.B.opcode == 99) { /* B{EQ,NE,LT,GE,LTU,GEU} */ jit_word_t jmp = label - instr; diff --git a/deps/lightning/lib/jit_riscv-fpu.c b/deps/lightning/lib/jit_riscv-fpu.c index 367975e87..e7884cb91 100644 --- a/deps/lightning/lib/jit_riscv-fpu.c +++ b/deps/lightning/lib/jit_riscv-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index 2f1d72584..fd673fc4d 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -5,6 +5,7 @@ 4, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,49 +25,61 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ - 20, /* addi */ + 16, /* addi */ 12, /* addcr */ - 28, /* addci */ + 24, /* addci */ 28, /* addxr */ 28, /* addxi */ 4, /* subr */ - 20, /* subi */ + 16, /* subi */ 12, /* subcr */ - 28, /* subci */ + 24, /* subci */ 28, /* subxr */ 28, /* subxi */ - 28, /* rsbi */ + 20, /* rsbi */ 4, /* mulr */ - 20, /* muli */ + 16, /* muli */ 12, /* qmulr */ - 24, /* qmuli */ + 20, /* qmuli */ 12, /* qmulr_u */ - 24, /* qmuli_u */ + 20, /* qmuli_u */ 4, /* divr */ - 20, /* divi */ + 16, /* divi */ 4, /* divr_u */ - 20, /* divi_u */ + 16, /* divi_u */ 20, /* qdivr */ 16, /* qdivi */ 20, /* qdivr_u */ 16, /* qdivi_u */ 4, /* remr */ - 20, /* remi */ + 16, /* remi */ 4, /* remr_u */ - 20, /* remi_u */ + 16, /* remi_u */ 4, /* andr */ - 20, /* andi */ + 16, /* andi */ 4, /* orr */ - 20, /* ori */ + 16, /* ori */ 4, /* xorr */ - 20, /* xori */ + 16, /* xori */ 4, /* lshr */ 4, /* lshi */ 4, /* rshr */ @@ -93,30 +109,37 @@ 8, /* ner */ 8, /* nei */ 4, /* movr */ - 24, /* movi */ + 12, /* movi */ + 12, /* movnr */ + 12, /* movzr */ + 28, /* casr */ + 40, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 4, /* extr_i */ 8, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ 20, /* htonr_us */ 52, /* htonr_ui */ 116, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 16, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 16, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 16, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 16, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 16, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 16, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 16, /* ldi_l */ 8, /* ldxr_c */ 16, /* ldxi_c */ 8, /* ldxr_uc */ @@ -132,13 +155,13 @@ 8, /* ldxr_l */ 16, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 16, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 16, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 16, /* sti_l */ 8, /* stxr_c */ 16, /* stxi_c */ 8, /* stxr_s */ @@ -156,7 +179,7 @@ 4, /* bler_u */ 8, /* blei_u */ 4, /* beqr */ - 28, /* beqi */ + 16, /* beqi */ 4, /* bger */ 8, /* bgei */ 4, /* bger_u */ @@ -166,7 +189,7 @@ 4, /* bgtr_u */ 8, /* bgti_u */ 4, /* bner */ - 20, /* bnei */ + 16, /* bnei */ 8, /* bmsr */ 12, /* bmsi */ 8, /* bmcr */ @@ -188,17 +211,41 @@ 16, /* bxsubr_u */ 20, /* bxsubi_u */ 4, /* jmpr */ - 28, /* jmpi */ + 4, /* jmpi */ 4, /* callr */ - 28, /* calli */ + 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -258,11 +305,11 @@ 4, /* movr_f */ 8, /* movi_f */ 4, /* ldr_f */ - 12, /* ldi_f */ + 16, /* ldi_f */ 8, /* ldxr_f */ 16, /* ldxi_f */ 4, /* str_f */ - 12, /* sti_f */ + 16, /* sti_f */ 8, /* stxr_f */ 16, /* stxi_f */ 8, /* bltr_f */ @@ -303,87 +350,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 24, /* addi_d */ + 20, /* addi_d */ 4, /* subr_d */ - 24, /* subi_d */ - 24, /* rsbi_d */ + 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ - 24, /* muli_d */ + 20, /* muli_d */ 4, /* divr_d */ - 24, /* divi_d */ + 20, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 4, /* ltr_d */ - 24, /* lti_d */ + 20, /* lti_d */ 4, /* ler_d */ - 24, /* lei_d */ + 20, /* lei_d */ 4, /* eqr_d */ - 24, /* eqi_d */ + 20, /* eqi_d */ 4, /* ger_d */ - 24, /* gei_d */ + 20, /* gei_d */ 4, /* gtr_d */ - 24, /* gti_d */ + 20, /* gti_d */ 8, /* ner_d */ - 28, /* nei_d */ + 24, /* nei_d */ 28, /* unltr_d */ - 48, /* unlti_d */ + 44, /* unlti_d */ 28, /* unler_d */ - 48, /* unlei_d */ + 44, /* unlei_d */ 28, /* uneqr_d */ - 48, /* uneqi_d */ + 44, /* uneqi_d */ 28, /* unger_d */ - 48, /* ungei_d */ + 44, /* ungei_d */ 28, /* ungtr_d */ - 48, /* ungti_d */ + 44, /* ungti_d */ 40, /* ltgtr_d */ - 60, /* ltgti_d */ + 56, /* ltgti_d */ 28, /* ordr_d */ - 48, /* ordi_d */ + 44, /* ordi_d */ 20, /* unordr_d */ - 40, /* unordi_d */ + 36, /* unordi_d */ 4, /* truncr_d_i */ 4, /* truncr_d_l */ 4, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 16, /* movi_d */ 4, /* ldr_d */ - 12, /* ldi_d */ + 16, /* ldi_d */ 8, /* ldxr_d */ 16, /* ldxi_d */ 4, /* str_d */ - 12, /* sti_d */ + 16, /* sti_d */ 8, /* stxr_d */ 16, /* stxi_d */ 8, /* bltr_d */ - 28, /* blti_d */ + 24, /* blti_d */ 8, /* bler_d */ - 28, /* blei_d */ + 24, /* blei_d */ 8, /* beqr_d */ - 28, /* beqi_d */ + 24, /* beqi_d */ 8, /* bger_d */ - 28, /* bgei_d */ + 24, /* bgei_d */ 8, /* bgtr_d */ - 28, /* bgti_d */ + 24, /* bgti_d */ 8, /* bner_d */ - 28, /* bnei_d */ + 24, /* bnei_d */ 32, /* bunltr_d */ - 52, /* bunlti_d */ + 48, /* bunlti_d */ 32, /* bunler_d */ - 52, /* bunlei_d */ + 48, /* bunlei_d */ 32, /* buneqr_d */ - 52, /* buneqi_d */ + 48, /* buneqi_d */ 32, /* bunger_d */ - 52, /* bungei_d */ + 48, /* bungei_d */ 32, /* bungtr_d */ - 52, /* bungti_d */ + 48, /* bungti_d */ 44, /* bltgtr_d */ - 64, /* bltgti_d */ + 60, /* bltgti_d */ 32, /* bordr_d */ - 52, /* bordi_d */ + 48, /* bordi_d */ 24, /* bunordr_d */ - 44, /* bunordi_d */ + 40, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -397,5 +444,5 @@ 0, /* movr_d_ww */ 0, /* movi_d_ww */ 4, /* movr_d_w */ - 16, /* movi_d_w */ + 12, /* movi_d_w */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index aae678204..58ed61bed 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -28,6 +28,15 @@ typedef jit_pointer_t jit_va_list_t; /* * Prototypes */ +#if __WORDSIZE == 64 +# define load_const(r0, i0) _load_const(_jit, r0, i0) +static void _load_const(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t hash_const(jit_word_t); +# define put_const(i0) _put_const(_jit, i0) +static void _put_const(jit_state_t*, jit_word_t); +# define get_const(i0) _get_const(_jit, i0) +static jit_word_t _get_const(jit_state_t*, jit_word_t); +#endif #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); @@ -219,20 +228,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -292,7 +299,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -325,19 +332,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -392,7 +402,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); @@ -404,7 +414,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); @@ -416,7 +426,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); @@ -428,7 +438,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); @@ -440,7 +450,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, JIT_RA0 - v->u.w); @@ -452,7 +462,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); @@ -464,7 +474,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); @@ -474,10 +484,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); else @@ -486,11 +496,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { @@ -609,10 +619,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -626,11 +636,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -883,10 +893,51 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t const_offset; jit_int32_t patch_offset; } undo; +#if __WORDSIZE == 64 + if (!_jitc->consts.hash.table) { + jit_alloc((jit_pointer_t *)&_jitc->consts.hash.table, + 16 * sizeof(jit_const_t *)); + _jitc->consts.hash.size = 16; + jit_alloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + sizeof(jit_const_t *)); + jit_alloc((jit_pointer_t *)_jitc->consts.pool.ptr, + 1024 * sizeof(jit_const_t)); + _jitc->consts.pool.length = 1; + } + /* Reset table if starting over jit generation */ + else + memset(_jitc->consts.hash.table, 0, + _jitc->consts.hash.size * sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) { + jit_int32_t i; + jit_const_t *list = _jitc->consts.pool.ptr[offset]; + for (i = 0; i < 1023; ++i, ++list) + list->next = list + 1; + if (offset + 1 < _jitc->consts.pool.length) + list->next = _jitc->consts.pool.ptr[offset + 1]; + else + list->next = NULL; + } + _jitc->consts.pool.list = _jitc->consts.pool.ptr[0]; + _jitc->consts.hash.count = 0; + if (!_jitc->consts.vector.instrs) { + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + 16 * sizeof(jit_word_t)); + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.values, + 16 * sizeof(jit_word_t)); + _jitc->consts.vector.length = 16; + } + _jitc->consts.vector.offset = 0; +#endif + _jitc->function = NULL; jit_reglive_setup(); @@ -1002,11 +1053,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1112,12 +1165,25 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1373,6 +1439,11 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = jmpi(_jit->pc.w); + else word = jmpi_p(_jit->pc.w); patch(word, node); } @@ -1391,7 +1462,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } @@ -1402,6 +1478,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1419,6 +1499,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1460,11 +1550,19 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: + case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1472,10 +1570,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1519,6 +1629,35 @@ _emit_code(jit_state_t *_jit) #undef case_rw #undef case_rr +#if __WORDSIZE == 64 + /* Record all constants to be patched */ + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + put_const(value); + } + /* Record all direct constants */ + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + put_const(_jitc->consts.vector.values[offset]); + /* Now actually inject constants at the end of code buffer */ + if (_jitc->consts.hash.count) { + jit_const_t *entry; + /* Insert nop if aligned at 4 bytes */ + if (_jit->pc.w % sizeof(jit_word_t)) + nop(_jit->pc.w % sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.hash.size; offset++) { + entry = _jitc->consts.hash.table[offset]; + for (; entry; entry = entry->next) { + /* Make sure to not write out of bounds */ + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + entry->address = _jit->pc.w; + *_jit->pc.ul++ = entry->value; + } + } + } +#endif + for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = _jitc->patches.ptr[offset].inst; @@ -1526,6 +1665,25 @@ _emit_code(jit_state_t *_jit) patch_at(word, value); } +#if __WORDSIZE == 64 + /* Patch direct complex constants */ + if (_jitc->consts.vector.instrs) { + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + patch_at(_jitc->consts.vector.instrs[offset], + _jitc->consts.vector.values[offset]); + jit_free((jit_pointer_t *)&_jitc->consts.vector.instrs); + jit_free((jit_pointer_t *)&_jitc->consts.vector.values); + } + + /* Hash table no longer need */ + if (_jitc->consts.hash.table) { + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) + jit_free((jit_pointer_t *)_jitc->consts.pool.ptr + offset); + jit_free((jit_pointer_t *)&_jitc->consts.pool.ptr); + } +#endif + jit_flush(_jit->code.ptr, _jit->pc.uc); return (_jit->code.ptr); @@ -1536,6 +1694,114 @@ _emit_code(jit_state_t *_jit) # include "jit_riscv-fpu.c" #undef CODE +static void +_load_const(jit_state_t *_jit, jit_int32_t reg, jit_word_t value) +{ + if (_jitc->consts.vector.offset >= _jitc->consts.vector.length) { + jit_word_t new_size = _jitc->consts.vector.length * + 2 * sizeof(jit_word_t); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.values, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + _jitc->consts.vector.length *= 2; + } + _jitc->consts.vector.instrs[_jitc->consts.vector.offset] = _jit->pc.w; + _jitc->consts.vector.values[_jitc->consts.vector.offset] = value; + ++_jitc->consts.vector.offset; + /* Resolve later the pc relative address */ + put_const(value); + AUIPC(reg, 0); + ADDI(reg, reg, 0); + LD(reg, reg, 0); +} + +static jit_word_t +hash_const(jit_word_t value) +{ + const jit_uint8_t *ptr; + jit_word_t i, key; + for (i = key = 0, ptr = (jit_uint8_t *)&value; i < 4; ++i) + key = (key << (key & 1)) ^ ptr[i]; + return (key); + +} + +static void +_put_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + + /* Check if already inserted in table */ + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return; + } + + /* Check if need to increase pool size */ + if (_jitc->consts.pool.list->next == NULL) { + jit_const_t *list; + jit_word_t offset; + jit_word_t new_size = (_jitc->consts.pool.length + 1) * + sizeof(jit_const_t*); + jit_realloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + _jitc->consts.pool.length * sizeof(jit_const_t*), new_size); + jit_alloc((jit_pointer_t *) + _jitc->consts.pool.ptr + _jitc->consts.pool.length, + 1024 * sizeof(jit_const_t)); + list = _jitc->consts.pool.ptr[_jitc->consts.pool.length]; + _jitc->consts.pool.list->next = list; + for (offset = 0; offset < 1023; ++offset, ++list) + list->next = list + 1; + list->next = NULL; + ++_jitc->consts.pool.length; + } + + /* Rehash if more than 75% used table */ + if (_jitc->consts.hash.count > (_jitc->consts.hash.size / 4) * 3) { + jit_word_t i, k; + jit_const_t *next; + jit_const_t **table; + jit_alloc((jit_pointer_t *)&table, + _jitc->consts.hash.size * 2 * sizeof(jit_const_t *)); + for (i = 0; i < _jitc->consts.hash.size; ++i) { + for (entry = _jitc->consts.hash.table[i]; entry; entry = next) { + next = entry->next; + k = hash_const(entry->value) % (_jitc->consts.hash.size * 2); + entry->next = table[k]; + table[k] = entry; + } + } + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + _jitc->consts.hash.size *= 2; + _jitc->consts.hash.table = table; + } + + /* Insert in hash */ + entry = _jitc->consts.pool.list; + _jitc->consts.pool.list = entry->next; + ++_jitc->consts.hash.count; + entry->value = value; + entry->next = _jitc->consts.hash.table[key]; + _jitc->consts.hash.table[key] = entry; +} + +static jit_word_t +_get_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return (entry->address); + } + /* Only the final patch should call get_const() */ + abort(); +} + void jit_flush(void *fptr, void *tptr) { diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c index 02cac6047..38dcbdd1e 100644 --- a/deps/lightning/lib/jit_s390-cpu.c +++ b/deps/lightning/lib/jit_s390-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -966,6 +966,23 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +#endif +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -1039,27 +1056,24 @@ static void _qdivi_u(jit_state_t*,jit_int32_t, # if __WORDSIZE == 32 # define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2) static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# else -# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2) -# endif -# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# if __WORDSIZE == 32 # define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2) static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# else -# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2) -# endif -# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0); static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# if __WORDSIZE == 32 # define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # else +# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2) +# define lshi(r0,r1,i0) SLLG(r0,r1,i0,0) +# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2) +# define rshi(r0,r1,i0) SRAG(r0,r1,i0,0) # define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2) +# define rshi_u(r0,r1,i0) SRLG(r0,r1,i0,0) # endif -# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) -static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define negr(r0,r1) LCR(r0,r1) # else @@ -1079,13 +1093,6 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif # define extr_c(r0,r1) LGBR(r0,r1) # define extr_uc(r0,r1) LLGCR(r0,r1) # define extr_s(r0,r1) LGHR(r0,r1) @@ -1284,13 +1291,13 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1) # define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1) # define jmpr(r0) BR(r0) -# define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +# define jmpi(i0,i1) _jmpi(_jit,i0,i1) +static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) BALR(_R14_REGNO,r0) -# define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +# define calli(i0,i1) _calli(_jit,i0,i1) +static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -2442,6 +2449,90 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi_p(_jit->pc.w, r2, 0); +#if __WORDSIZE == 32 + LR(r0, r1); +#else + LGR(r0, r1); +#endif + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei_p(_jit->pc.w, r2, 0); +#if __WORDSIZE == 32 + LR(r0, r1); +#else + LGR(r0, r1); +#endif + patch_at(w, _jit->pc.w); +} + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVR(r0, r1); + SRL(r0, 16, 0); + LLGHR(r0, r0); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVR(r0, r1); +# if __WORDSIZE == 64 + LLGFR(r0, r0); +# endif +} + +#if __WORDSIZE == 64 +static void +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVGR(r0, r1); +} +#endif + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg_but_zero(0); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r2); + /* The CS and CSG instructions below effectively do atomically: + * if (*r1 == r0) + * *r1 = r3; + * else + * r0 = *r1 + * So, we do not need to check cpu flags to know if it did work, + * just compare if values are different. + * Obviously it is somewhat of undefined behavior if old_value (r2) + * and new_value (r3) have the same value, but should still work + * as expected as a noop. + */ +# if __WORDSIZE == 32 + CS(r0, r3, 0, r1); +# else + CSG(r0, r3, 0, r1); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2833,19 +2924,14 @@ _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SLL(r0, 0, r2); } } -#endif static void _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - lshr(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SLL(r0, i0, 0); } -# if __WORDSIZE == 32 static void _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2862,19 +2948,14 @@ _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SRA(r0, 0, r2); } } -#endif static void _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - rshr(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SRA(r0, i0, 0); } -# if __WORDSIZE == 32 static void _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2891,17 +2972,14 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SRL(r0, 0, r2); } } -#endif static void _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - rshr_u(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SRL(r0, i0, 0); } +#endif static void _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -3433,13 +3511,14 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -static void -_jmpi(jit_state_t *_jit, jit_word_t i0) +static jit_word_t +_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) { - jit_word_t d; jit_int32_t reg; - d = (i0 - _jit->pc.w) >> 1; - if (s16_p(d)) + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 1; + if (i1 && s16_p(d)) J(x16(d)); else if (s32_p(d)) BRL(d); @@ -3449,6 +3528,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg_but_zero(reg); } + return (w); } static jit_word_t @@ -3463,13 +3543,16 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void -_calli(jit_state_t *_jit, jit_word_t i0) +static jit_word_t +_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) { - jit_word_t d; jit_int32_t reg; - d = (i0 - _jit->pc.w) >> 1; - if (s32_p(d)) + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 1; + if (i1 && s16_p(d)) + BRAS(_R14_REGNO, x16(d)); + else if (s32_p(d)) BRASL(_R14_REGNO, d); else { reg = jit_get_reg_but_zero(0); @@ -3477,6 +3560,7 @@ _calli(jit_state_t *_jit, jit_word_t i0) callr(rn(reg)); jit_unget_reg_but_zero(reg); } + return (w); } static jit_word_t @@ -3825,17 +3909,17 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) u.s[7] = i1.s; #endif } - /* BRC */ + /* BRC or BRL */ else if (i0.b.op == 0xA7) { - assert(i0.b.r3 == 0x4); + assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5); d = (label - instr) >> 1; assert(s16_p(d)); i1.b.i2 = d; u.s[1] = i1.s; } - /* BRCL */ + /* BRCL or BRASL */ else if (i0.b.op == 0xC0) { - assert(i0.b.r3 == 0x4); + assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5); d = (label - instr) >> 1; assert(s32_p(d)); i12.i = d; diff --git a/deps/lightning/lib/jit_s390-fpu.c b/deps/lightning/lib/jit_s390-fpu.c index 6d6051352..edf9ddd2b 100644 --- a/deps/lightning/lib/jit_s390-fpu.c +++ b/deps/lightning/lib/jit_s390-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index bb8b2dc97..96fa6b422 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -1,20 +1,23 @@ - #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 82 0, /* data */ 0, /* live */ - 6, /* align */ + 8, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 2, /* label */ - 42, /* prolog */ + 38, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,184 +25,227 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 44, /* va_start */ - 104, /* va_arg */ - 100, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 40, /* va_start */ + 82, /* va_arg */ + 78, /* va_arg_d */ 0, /* va_end */ - 8, /* addr */ - 24, /* addi */ - 8, /* addcr */ - 20, /* addci */ - 8, /* addxr */ - 12, /* addxi */ - 12, /* subr */ - 24, /* subi */ - 12, /* subcr */ - 20, /* subci */ - 12, /* subxr */ - 12, /* subxi */ - 28, /* rsbi */ - 8, /* mulr */ - 24, /* muli */ - 60, /* qmulr */ - 68, /* qmuli */ - 16, /* qmulr_u */ - 32, /* qmuli_u */ - 12, /* divr */ - 28, /* divi */ + 4, /* addr */ + 12, /* addi */ + 4, /* addcr */ + 10, /* addci */ + 6, /* addxr */ + 10, /* addxi */ + 6, /* subr */ + 12, /* subi */ + 6, /* subcr */ + 10, /* subci */ + 8, /* subxr */ + 10, /* subxi */ + 14, /* rsbi */ + 6, /* mulr */ + 14, /* muli */ + 38, /* qmulr */ + 42, /* qmuli */ + 10, /* qmulr_u */ + 18, /* qmuli_u */ + 10, /* divr */ + 18, /* divi */ 16, /* divr_u */ - 32, /* divi_u */ - 16, /* qdivr */ - 20, /* qdivi */ - 20, /* qdivr_u */ - 24, /* qdivi_u */ - 12, /* remr */ - 28, /* remi */ + 24, /* divi_u */ + 12, /* qdivr */ + 16, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 10, /* remr */ + 18, /* remi */ 16, /* remr_u */ - 32, /* remi_u */ - 8, /* andr */ - 20, /* andi */ - 8, /* orr */ - 20, /* ori */ - 8, /* xorr */ - 24, /* xori */ - 6, /* lshr */ - 10, /* lshi */ - 6, /* rshr */ - 10, /* rshi */ - 6, /* rshr_u */ - 10, /* rshi_u */ - 4, /* negr */ - 12, /* comr */ - 20, /* ltr */ - 24, /* lti */ - 20, /* ltr_u */ - 24, /* lti_u */ - 20, /* ler */ - 24, /* lei */ - 20, /* ler_u */ - 24, /* lei_u */ - 20, /* eqr */ - 24, /* eqi */ - 20, /* ger */ - 24, /* gei */ - 20, /* ger_u */ - 24, /* gei_u */ - 20, /* gtr */ - 24, /* gti */ - 20, /* gtr_u */ - 24, /* gti_u */ - 20, /* ner */ - 24, /* nei */ - 4, /* movr */ - 16, /* movi */ + 24, /* remi_u */ + 4, /* andr */ + 10, /* andi */ + 4, /* orr */ + 10, /* ori */ + 4, /* xorr */ + 12, /* xori */ + 8, /* lshr */ + 6, /* lshi */ + 8, /* rshr */ + 6, /* rshi */ + 8, /* rshr_u */ + 6, /* rshi_u */ + 2, /* negr */ + 8, /* comr */ + 16, /* ltr */ + 20, /* lti */ + 16, /* ltr_u */ + 20, /* lti_u */ + 16, /* ler */ + 20, /* lei */ + 16, /* ler_u */ + 20, /* lei_u */ + 16, /* eqr */ + 20, /* eqi */ + 16, /* ger */ + 20, /* gei */ + 16, /* ger_u */ + 20, /* gei_u */ + 16, /* gtr */ + 20, /* gti */ + 16, /* gtr_u */ + 20, /* gti_u */ + 16, /* ner */ + 20, /* nei */ + 2, /* movr */ + 8, /* movi */ + 14, /* movnr */ + 14, /* movzr */ + 22, /* casr */ + 28, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ - 4, /* extr_i */ - 4, /* extr_ui */ + 0, /* extr_i */ + 0, /* extr_ui */ + 12, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ - 4, /* htonr_ui */ - 4, /* htonr_ul */ + 2, /* htonr_ui */ + 0, /* htonr_ul */ 6, /* ldr_c */ - 18, /* ldi_c */ + 12, /* ldi_c */ 6, /* ldr_uc */ - 18, /* ldi_uc */ - 6, /* ldr_s */ - 18, /* ldi_s */ + 14, /* ldi_uc */ + 4, /* ldr_s */ + 10, /* ldi_s */ 6, /* ldr_us */ - 18, /* ldi_us */ + 12, /* ldi_us */ 6, /* ldr_i */ - 18, /* ldi_i */ - 6, /* ldr_ui */ - 18, /* ldi_ui */ - 6, /* ldr_l */ - 18, /* ldi_l */ - 14, /* ldxr_c */ - 26, /* ldxi_c */ - 14, /* ldxr_uc */ - 26, /* ldxi_uc */ - 14, /* ldxr_s */ - 26, /* ldxi_s */ - 14, /* ldxr_us */ - 26, /* ldxi_us */ - 14, /* ldxr_i */ - 26, /* ldxi_i */ - 14, /* ldxr_ui */ - 26, /* ldxi_ui */ - 14, /* ldxr_l */ - 26, /* ldxi_l */ + 12, /* ldi_i */ + 0, /* ldr_ui */ + 0, /* ldi_ui */ + 0, /* ldr_l */ + 0, /* ldi_l */ + 10, /* ldxr_c */ + 16, /* ldxi_c */ + 10, /* ldxr_uc */ + 16, /* ldxi_uc */ + 8, /* ldxr_s */ + 14, /* ldxi_s */ + 10, /* ldxr_us */ + 16, /* ldxi_us */ + 10, /* ldxr_i */ + 16, /* ldxi_i */ + 0, /* ldxr_ui */ + 0, /* ldxi_ui */ + 0, /* ldxr_l */ + 0, /* ldxi_l */ 4, /* str_c */ - 16, /* sti_c */ + 12, /* sti_c */ 4, /* str_s */ - 16, /* sti_s */ + 10, /* sti_s */ 4, /* str_i */ - 16, /* sti_i */ - 6, /* str_l */ - 18, /* sti_l */ - 12, /* stxr_c */ - 28, /* stxi_c */ - 12, /* stxr_s */ - 28, /* stxi_s */ - 12, /* stxr_i */ - 28, /* stxi_i */ - 14, /* stxr_l */ - 30, /* stxi_l */ - 10, /* bltr */ - 14, /* blti */ - 10, /* bltr_u */ - 14, /* blti_u */ - 10, /* bler */ - 14, /* blei */ - 10, /* bler_u */ - 14, /* blei_u */ - 10, /* beqr */ - 26, /* beqi */ - 10, /* bger */ - 14, /* bgei */ - 10, /* bger_u */ - 14, /* bgei_u */ - 10, /* bgtr */ - 14, /* bgti */ - 10, /* bgtr_u */ - 14, /* bgti_u */ - 10, /* bner */ - 26, /* bnei */ - 18, /* bmsr */ - 18, /* bmsi */ - 18, /* bmcr */ - 18, /* bmci */ - 10, /* boaddr */ - 14, /* boaddi */ - 10, /* boaddr_u */ - 14, /* boaddi_u */ - 10, /* bxaddr */ - 14, /* bxaddi */ - 10, /* bxaddr_u */ - 14, /* bxaddi_u */ - 10, /* bosubr */ - 14, /* bosubi */ - 10, /* bosubr_u */ - 14, /* bosubi_u */ - 10, /* bxsubr */ - 14, /* bxsubi */ - 10, /* bxsubr_u */ - 14, /* bxsubi_u */ + 10, /* sti_i */ + 0, /* str_l */ + 0, /* sti_l */ + 8, /* stxr_c */ + 16, /* stxi_c */ + 8, /* stxr_s */ + 16, /* stxi_s */ + 8, /* stxr_i */ + 16, /* stxi_i */ + 0, /* stxr_l */ + 0, /* stxi_l */ + 8, /* bltr */ + 12, /* blti */ + 8, /* bltr_u */ + 12, /* blti_u */ + 8, /* bler */ + 12, /* blei */ + 8, /* bler_u */ + 12, /* blei_u */ + 8, /* beqr */ + 16, /* beqi */ + 8, /* bger */ + 12, /* bgei */ + 8, /* bger_u */ + 12, /* bgei_u */ + 8, /* bgtr */ + 12, /* bgti */ + 8, /* bgtr_u */ + 12, /* bgti_u */ + 8, /* bner */ + 16, /* bnei */ + 12, /* bmsr */ + 14, /* bmsi */ + 12, /* bmcr */ + 14, /* bmci */ + 8, /* boaddr */ + 12, /* boaddi */ + 8, /* boaddr_u */ + 12, /* boaddi_u */ + 8, /* bxaddr */ + 12, /* bxaddi */ + 8, /* bxaddr_u */ + 12, /* bxaddi_u */ + 8, /* bosubr */ + 12, /* bosubi */ + 8, /* bosubr_u */ + 12, /* bosubi_u */ + 8, /* bxsubr */ + 12, /* bxsubi */ + 8, /* bxsubr_u */ + 12, /* bxsubi_u */ 2, /* jmpr */ - 18, /* jmpi */ + 6, /* jmpi */ 2, /* callr */ - 18, /* calli */ + 6, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -207,20 +253,20 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 40, /* epilog */ + 36, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 6, /* addr_f */ - 26, /* addi_f */ + 24, /* addi_f */ 8, /* subr_f */ - 26, /* subi_f */ + 24, /* subi_f */ 28, /* rsbi_f */ 6, /* mulr_f */ - 26, /* muli_f */ + 24, /* muli_f */ 8, /* divr_f */ - 26, /* divi_f */ + 24, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ @@ -253,21 +299,21 @@ 16, /* unordr_f */ 36, /* unordi_f */ 4, /* truncr_f_i */ - 4, /* truncr_f_l */ + 0, /* truncr_f_l */ 4, /* extr_f */ 4, /* extr_d_f */ 2, /* movr_f */ 20, /* movi_f */ 4, /* ldr_f */ - 16, /* ldi_f */ - 12, /* ldxr_f */ - 24, /* ldxi_f */ + 10, /* ldi_f */ + 8, /* ldxr_f */ + 14, /* ldxi_f */ 4, /* str_f */ - 16, /* sti_f */ - 12, /* stxr_f */ - 24, /* stxi_f */ + 10, /* sti_f */ + 8, /* stxr_f */ + 14, /* stxi_f */ 10, /* bltr_f */ - 30, /* blti_f */ + 28, /* blti_f */ 10, /* bler_f */ 30, /* blei_f */ 10, /* beqr_f */ @@ -279,11 +325,11 @@ 10, /* bner_f */ 30, /* bnei_f */ 10, /* bunltr_f */ - 30, /* bunlti_f */ + 28, /* bunlti_f */ 10, /* bunler_f */ - 30, /* bunlei_f */ + 28, /* bunlei_f */ 18, /* buneqr_f */ - 38, /* buneqi_f */ + 36, /* buneqi_f */ 10, /* bunger_f */ 30, /* bungei_f */ 10, /* bungtr_f */ @@ -293,7 +339,7 @@ 10, /* bordr_f */ 30, /* bordi_f */ 10, /* bunordr_f */ - 30, /* bunordi_f */ + 28, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -304,87 +350,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 6, /* addr_d */ - 26, /* addi_d */ + 34, /* addi_d */ 8, /* subr_d */ - 26, /* subi_d */ - 28, /* rsbi_d */ + 34, /* subi_d */ + 38, /* rsbi_d */ 6, /* mulr_d */ - 26, /* muli_d */ + 34, /* muli_d */ 8, /* divr_d */ - 26, /* divi_d */ + 34, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 36, /* lti_d */ + 46, /* lti_d */ 16, /* ler_d */ - 36, /* lei_d */ + 46, /* lei_d */ 16, /* eqr_d */ - 36, /* eqi_d */ + 46, /* eqi_d */ 16, /* ger_d */ - 36, /* gei_d */ + 46, /* gei_d */ 16, /* gtr_d */ - 36, /* gti_d */ + 46, /* gti_d */ 16, /* ner_d */ - 36, /* nei_d */ + 46, /* nei_d */ 16, /* unltr_d */ - 36, /* unlti_d */ + 46, /* unlti_d */ 16, /* unler_d */ - 36, /* unlei_d */ + 46, /* unlei_d */ 20, /* uneqr_d */ - 40, /* uneqi_d */ + 50, /* uneqi_d */ 16, /* unger_d */ - 36, /* ungei_d */ + 46, /* ungei_d */ 16, /* ungtr_d */ - 36, /* ungti_d */ + 46, /* ungti_d */ 20, /* ltgtr_d */ - 40, /* ltgti_d */ + 50, /* ltgti_d */ 16, /* ordr_d */ - 36, /* ordi_d */ + 46, /* ordi_d */ 16, /* unordr_d */ - 36, /* unordi_d */ + 46, /* unordi_d */ 4, /* truncr_d_i */ - 4, /* truncr_d_l */ + 0, /* truncr_d_l */ 4, /* extr_d */ 4, /* extr_f_d */ 2, /* movr_d */ - 24, /* movi_d */ + 30, /* movi_d */ 4, /* ldr_d */ - 16, /* ldi_d */ - 12, /* ldxr_d */ - 24, /* ldxi_d */ + 10, /* ldi_d */ + 8, /* ldxr_d */ + 14, /* ldxi_d */ 4, /* str_d */ - 16, /* sti_d */ - 12, /* stxr_d */ - 24, /* stxi_d */ + 10, /* sti_d */ + 8, /* stxr_d */ + 14, /* stxi_d */ 10, /* bltr_d */ - 30, /* blti_d */ + 38, /* blti_d */ 10, /* bler_d */ - 30, /* blei_d */ + 38, /* blei_d */ 10, /* beqr_d */ - 34, /* beqi_d */ + 40, /* beqi_d */ 10, /* bger_d */ - 30, /* bgei_d */ + 40, /* bgei_d */ 10, /* bgtr_d */ - 30, /* bgti_d */ + 40, /* bgti_d */ 10, /* bner_d */ - 30, /* bnei_d */ + 40, /* bnei_d */ 10, /* bunltr_d */ - 30, /* bunlti_d */ + 38, /* bunlti_d */ 10, /* bunler_d */ - 30, /* bunlei_d */ + 38, /* bunlei_d */ 18, /* buneqr_d */ - 38, /* buneqi_d */ + 46, /* buneqi_d */ 10, /* bunger_d */ - 30, /* bungei_d */ + 40, /* bungei_d */ 10, /* bungtr_d */ - 30, /* bungti_d */ + 40, /* bungti_d */ 18, /* bltgtr_d */ - 38, /* bltgti_d */ + 48, /* bltgti_d */ 10, /* bordr_d */ - 30, /* bordi_d */ + 40, /* bordi_d */ 10, /* bunordr_d */ - 30, /* bunordi_d */ + 38, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -402,12 +448,13 @@ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 100 0, /* data */ 0, /* live */ - 6, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 2, /* label */ @@ -416,7 +463,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -424,11 +474,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 44, /* va_start */ - 104, /* va_arg */ - 100, /* va_arg_d */ + 100, /* va_arg */ + 96, /* va_arg_d */ 0, /* va_end */ 8, /* addr */ 24, /* addi */ @@ -445,8 +507,8 @@ 28, /* rsbi */ 8, /* mulr */ 24, /* muli */ - 60, /* qmulr */ - 68, /* qmuli */ + 52, /* qmulr */ + 60, /* qmuli */ 16, /* qmulr_u */ 32, /* qmuli_u */ 12, /* divr */ @@ -468,11 +530,11 @@ 8, /* xorr */ 24, /* xori */ 6, /* lshr */ - 10, /* lshi */ + 6, /* lshi */ 6, /* rshr */ - 10, /* rshi */ + 6, /* rshi */ 6, /* rshr_u */ - 10, /* rshi_u */ + 6, /* rshi_u */ 4, /* negr */ 12, /* comr */ 20, /* ltr */ @@ -497,12 +559,19 @@ 24, /* nei */ 4, /* movr */ 16, /* movi */ + 18, /* movnr */ + 18, /* movzr */ + 30, /* casr */ + 42, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 12, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 4, /* htonr_ul */ @@ -591,17 +660,41 @@ 10, /* bxsubr_u */ 14, /* bxsubi_u */ 2, /* jmpr */ - 18, /* jmpi */ + 6, /* jmpi */ 2, /* callr */ - 18, /* calli */ + 14, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index 577bb967d..fb04d3e40 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -240,18 +240,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -305,7 +305,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -352,18 +352,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -408,7 +412,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _R2 - v->u.w); @@ -421,7 +425,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _R2 - v->u.w); @@ -434,7 +438,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _R2 - v->u.w); @@ -447,7 +451,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _R2 - v->u.w); @@ -460,7 +464,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 32 @@ -479,7 +483,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _R2 - v->u.w); @@ -492,7 +496,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _R2 - v->u.w); @@ -503,10 +507,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_R2 - v->u.w, u); else @@ -515,11 +519,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_R2 - v->u.w, u); else { @@ -627,10 +631,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_R2 - _jitc->function->call.argi, u); @@ -644,11 +648,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_R2 - _jitc->function->call.argi, u); @@ -890,6 +894,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; } undo; @@ -1014,11 +1022,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 1) & ~1); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1138,6 +1148,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1147,6 +1162,16 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1400,14 +1425,21 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - jmpi(temp->u.w); + jmpi(temp->u.w, 1); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s32_p(word)) { + offset = s16_p(word); + word = jmpi(_jit->pc.w, offset); + } + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } else - jmpi(node->u.w); + jmpi(node->u.w, 1); break; case jit_code_callr: callr(rn(node->u.w)); @@ -1418,19 +1450,30 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - calli(temp->u.w); + calli(temp->u.w, 1); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s32_p(word)) { + offset =s16_p(word); + word = calli(_jit->pc.w, offset); + } + else + word = calli_p(_jit->pc.w); patch(word, node); } } else - calli(node->u.w); + calli(node->u.w, 1); break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1448,6 +1491,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1471,11 +1524,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1485,10 +1550,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_size.c b/deps/lightning/lib/jit_size.c index 0edbdcdf6..f4caccb1d 100644 --- a/deps/lightning/lib/jit_size.c +++ b/deps/lightning/lib/jit_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -24,6 +24,9 @@ * Initialization */ static jit_int16_t _szs[jit_code_last_code] = { +#if GET_JIT_SIZE +# define JIT_INSTR_MAX 512 +#else # if defined(__i386__) || defined(__x86_64__) # include "jit_x86-sz.c" # elif defined(__mips__) @@ -46,7 +49,10 @@ static jit_int16_t _szs[jit_code_last_code] = { # include "jit_alpha-sz.c" # elif defined(__riscv) # include "jit_riscv-sz.c" +# elif defined(__loongarch__) +# include "jit_loongarch-sz.c" # endif +#endif }; /* @@ -57,11 +63,40 @@ _jit_get_size(jit_state_t *_jit) { jit_word_t size; jit_node_t *node; +# if __riscv && __WORDSIZE == 64 + jit_word_t extra = 0; +# endif - for (size = JIT_INSTR_MAX, node = _jitc->head; node; node = node->next) - size += _szs[node->code]; + for (size = JIT_INSTR_MAX, node = _jitc->head; node; node = node->next) { +# if __riscv && __WORDSIZE == 64 + /* Get estimative of extra memory for constants at end of code. */ + switch (node->code) { + case jit_code_movi: + case jit_code_movi_f: + case jit_code_movi_d: + case jit_code_jmpi: + case jit_code_calli: + extra += sizeof(jit_word_t); + default: + break; + } +# endif + switch (node->code) { + /* The instructions are special because they can be arbitrarily long. */ + case jit_code_align: + case jit_code_skip: + size += node->u.w; + break; + default: + size += _szs[node->code]; + } + } +# if __riscv && __WORDSIZE == 64 + /* Heuristically only 20% of constants are unique. */ + size += extra / 5; +# endif - return ((size + 4095) & -4096); + return size; } jit_word_t diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index 051647a7a..0bf12bf5e 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -100,6 +100,9 @@ _f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # if __WORDSIZE == 64 +# define f3ri(op, rd, op3, rs1, rs2) _f3ri(_jit, op, rd, op3, rs1, rs2) +static void _f3ri(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) static void _f3rx(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -113,7 +116,7 @@ static void _f3s(jit_state_t*, # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; -# define f3a(op, rd, op3, rs1, rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) +# define f3a(op,rd,op3,rs1,asi,rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) static void _f3a(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; @@ -194,6 +197,11 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SWAP(rs1, rs2, rd) f3r(3, rd, 15, rs1, rs2) # define SWAPI(rs1, imm, rd) f3r(3, rd, 15, rs1, imm) # define SWAPA(rs1, rs2, asi, rd) f3a(3, rd, 23, rs1, asi, rs2) +/* Sparc v9 deprecates SWAP* in favor of CAS*A */ +# define CASA(rs1, rs2, rd) f3a(3, rd, 60, rs1, 128, rs2) +# if __WORDSIZE == 64 +# define CASXA(rs1, rs2, rd) f3a(3, rd, 62, rs1, 128, rs2) +# endif # define NOP() SETHI(0, 0) # define HI(im) ((im) >> 10) # define LO(im) ((im) & 0x3ff) @@ -545,6 +553,18 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); static void _movi(jit_state_t*, jit_int32_t, jit_word_t); # define movi_p(r0, i0) _movi_p(_jit, r0, i0) static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) # define addr(r0, r1, r2) ADD(r1, r2, r0) @@ -669,7 +689,6 @@ static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) # define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) # endif -# define htonr_us(r0,r1) extr_us(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -677,11 +696,7 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# if __WORDSIZE == 64 # define extr_i(r0,r1) _extr_i(_jit,r0,r1) static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) @@ -926,13 +941,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) _jmpr(_jit, r0) static void _jmpr(jit_state_t*,jit_int32_t); # define jmpi(i0) _jmpi(_jit, i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit, i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) _callr(_jit, r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit, i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit, i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(node) _prolog(_jit, node) @@ -1028,6 +1043,26 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, } # if __WORDSIZE == 64 +static void +_f3ri(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.asi.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + static void _f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -1213,6 +1248,54 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + ORI(r1, 0, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + ORI(r1, 0, r0); + patch_at(w, _jit->pc.w); +} + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r3); + /* The CASXA instruction compares the value in register r[rs2] with + * the doubleword in memory pointed to by the doubleword address in + * r[rs1]. If the values are equal, the value in r[rd] is swapped + * with the doubleword pointed to by the doubleword address in r[rs1]. + * If the values are not equal, the contents of the doubleword pointed + * to by r[rs1] replaces the value in r[rd], but the memory location + * remains unchanged. + */ +# if __WORDSIZE == 32 + CASA(r1, r2, r0); +# else + CASXA(r1, r2, r0); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2224,11 +2307,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc, if (s13_p(i1)) { CMPI(r0, i1); w = _jit->pc.w; -# if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); -# else - B(cc, (i0 - w) >> 2); -# endif NOP(); } else { @@ -2347,14 +2426,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (s22_p(w)) { - BA(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s22_p(d)) { + BA(d); NOP(); } else { @@ -2363,6 +2443,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2384,13 +2465,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; - CALLI(w); - NOP(); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s30_p(d)) { + CALLI(d); + NOP(); + } + else + w = calli_p(i0); + return (w); } static jit_word_t @@ -2562,6 +2649,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) else abort(); } + else if (i.op.b == 1) { + assert(s30_p((label - instr) >> 2)); + i.disp30.b = (label - instr) >> 2; + u.i[0] = i.v; + } else abort(); } diff --git a/deps/lightning/lib/jit_sparc-fpu.c b/deps/lightning/lib/jit_sparc-fpu.c index ae2cbab35..95313477c 100644 --- a/deps/lightning/lib/jit_sparc-fpu.c +++ b/deps/lightning/lib/jit_sparc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index ac683b660..b67f5cb87 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -1,10 +1,11 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 52 0, /* data */ 0, /* live */ - 0, /* align */ + 8, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -94,12 +110,19 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 16, /* movnr */ + 16, /* movzr */ + 24, /* casr */ + 32, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ 8, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -188,17 +211,41 @@ 12, /* bxsubr_u */ 12, /* bxsubi_u */ 8, /* jmpr */ - 16, /* jmpi */ + 8, /* jmpi */ 8, /* callr */ - 16, /* calli */ + 8, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -401,12 +448,13 @@ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 64 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ 4, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 4, /* label */ @@ -415,7 +463,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -423,8 +474,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -496,29 +559,36 @@ 16, /* nei */ 4, /* movr */ 24, /* movi */ + 16, /* movnr */ + 16, /* movzr */ + 24, /* casr */ + 44, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 8, /* extr_i */ 8, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ 8, /* htonr_us */ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 28, /* ldi_c */ + 24, /* ldi_c */ 4, /* ldr_uc */ - 28, /* ldi_uc */ + 24, /* ldi_uc */ 4, /* ldr_s */ - 28, /* ldi_s */ + 24, /* ldi_s */ 4, /* ldr_us */ - 28, /* ldi_us */ + 24, /* ldi_us */ 4, /* ldr_i */ - 28, /* ldi_i */ + 24, /* ldi_i */ 4, /* ldr_ui */ - 28, /* ldi_ui */ + 24, /* ldi_ui */ 4, /* ldr_l */ - 28, /* ldi_l */ + 24, /* ldi_l */ 4, /* ldxr_c */ 24, /* ldxi_c */ 4, /* ldxr_uc */ @@ -534,13 +604,13 @@ 4, /* ldxr_l */ 24, /* ldxi_l */ 4, /* str_c */ - 28, /* sti_c */ + 24, /* sti_c */ 4, /* str_s */ - 28, /* sti_s */ + 24, /* sti_s */ 4, /* str_i */ - 28, /* sti_i */ + 24, /* sti_i */ 4, /* str_l */ - 28, /* sti_l */ + 24, /* sti_l */ 4, /* stxr_c */ 24, /* stxi_c */ 4, /* stxr_s */ @@ -590,17 +660,41 @@ 12, /* bxsubr_u */ 12, /* bxsubi_u */ 8, /* jmpr */ - 32, /* jmpi */ + 8, /* jmpi */ 8, /* callr */ - 32, /* calli */ + 40, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -660,11 +754,11 @@ 16, /* movr_f */ 32, /* movi_f */ 8, /* ldr_f */ - 32, /* ldi_f */ + 28, /* ldi_f */ 8, /* ldxr_f */ 28, /* ldxi_f */ 8, /* str_f */ - 32, /* sti_f */ + 28, /* sti_f */ 8, /* stxr_f */ 28, /* stxi_f */ 20, /* bltr_f */ @@ -751,11 +845,11 @@ 4, /* movr_d */ 32, /* movi_d */ 4, /* ldr_d */ - 28, /* ldi_d */ + 24, /* ldi_d */ 4, /* ldxr_d */ 24, /* ldxi_d */ 4, /* str_d */ - 28, /* sti_d */ + 24, /* sti_d */ 4, /* stxr_d */ 24, /* stxi_d */ 12, /* bltr_d */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index 6abf3f9cd..350318b97 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -184,7 +184,7 @@ _jit_prolog(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; _jitc->function->self.size = stack_framesize; _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.aoff = _jitc->function->self.alen = 0; + _jitc->function->self.alen = 0; /* float conversion */ # if __WORDSIZE == 32 _jitc->function->self.aoff = -8; @@ -265,20 +265,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -339,12 +337,13 @@ jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { # if __WORDSIZE == 32 - if (u->code == jit_code_arg || u->code == jit_code_arg_f) + if ((u->code >= jit_code_arg_c && u->code <= jit_code_arg) || + u->code == jit_code_arg_f) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d); return (jit_arg_d_reg_p(u->u.w)); # else - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f); return (jit_arg_d_reg_p(u->u.w)); @@ -379,11 +378,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { @@ -394,7 +397,7 @@ _jit_arg(jit_state_t *_jit) offset = BIAS(_jitc->function->self.size); _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -471,7 +474,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _I0 + v->u.w); @@ -484,7 +487,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _I0 + v->u.w); @@ -497,7 +500,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _I0 + v->u.w); @@ -510,7 +513,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _I0 + v->u.w); @@ -523,7 +526,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { # if __WORDSIZE == 64 @@ -542,7 +545,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _I0 + v->u.w); @@ -555,7 +558,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _I0 + v->u.w); @@ -566,10 +569,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) # endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_I0 + v->u.w, u); else @@ -578,11 +581,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_I0 + v->u.w, u); else { @@ -795,9 +798,9 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_O0 + _jitc->function->call.argi, u); @@ -816,10 +819,10 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_O0 + _jitc->function->call.argi, u); @@ -1193,6 +1196,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; } undo; @@ -1308,11 +1315,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1450,6 +1459,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1459,6 +1473,16 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1696,7 +1720,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s22_p(word >> 2)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1711,9 +1740,17 @@ _emit_code(jit_state_t *_jit) temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w); - if (!(temp->flag & jit_flag_patch)) + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s30_p(word >> 2)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); + } } else calli(node->u.w); @@ -1722,6 +1759,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -1739,6 +1780,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -1760,11 +1811,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +#if __WORDSIZE == 64 + case jit_code_arg_l: +#endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1774,10 +1837,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 547f36c31..a9c121c48 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -136,12 +136,6 @@ # else # define il(l) ii(l) # endif -# define patch_abs(instr, label) \ - *(jit_word_t *)(instr - sizeof(jit_word_t)) = label -# define patch_rel(instr, label) \ - *(jit_int32_t *)(instr - 4) = label - instr -# define patch_rel_char(instr, label) \ - *(jit_int8_t *)(instr - 1) = label - instr # define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b) static void _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -369,19 +363,28 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t); static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t); # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) +#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) +static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) +static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # if __X64 && !__X64_32 # define movir(r0, r1) _movir(_jit, r0, r1) static void _movir(jit_state_t*,jit_int32_t,jit_int32_t); # define movir_u(r0, r1) _movir_u(_jit, r0, r1) static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t); # endif -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # if __X64 && !__X64_32 -#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); #endif # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -538,7 +541,7 @@ static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define jng(i0) jcc(X86_CC_NG, i0) # define jg(i0) jcc(X86_CC_G, i0) # define jnle(i0) jcc(X86_CC_NLE, i0) -static void _jcc(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t); # define jccs(code, i0) _jccs(_jit, code, i0) # define jos(i0) jccs(X86_CC_O, i0) # define jnos(i0) jccs(X86_CC_NO, i0) @@ -570,13 +573,15 @@ static void _jcc(jit_state_t*, jit_int32_t, jit_word_t); # define jngs(i0) jccs(X86_CC_NG, i0) # define jgs(i0) jccs(X86_CC_G, i0) # define jnles(i0) jccs(X86_CC_NLE, i0) -static void _jccs(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t); # define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1) -static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +static jit_word_t _jcr(jit_state_t*, + jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); # define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1) -static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +static jit_word_t _jci(jit_state_t*, + jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); # define jci0(code, i0, r0) _jci0(_jit, code, i0, r0) -static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t); +static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t); # define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1) static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define blti(i0, r0, i1) _blti(_jit, i0, r0, i1) @@ -678,7 +683,7 @@ static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); # define jmpi_p(i0) jmpi(i0) # endif # define jmpsi(i0) _jmpsi(_jit, i0) -static void _jmpsi(jit_state_t*, jit_uint8_t); +static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t); # define prolog(node) _prolog(_jit, node) static void _prolog(jit_state_t*, jit_node_t*); # define epilog(node) _epilog(_jit, node) @@ -689,8 +694,8 @@ static void _vastart(jit_state_t*, jit_int32_t); static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); # define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0) static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t); -# define patch_at(node, instr, label) _patch_at(_jit, node, instr, label) -static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); +# define patch_at(instr, label) _patch_at(_jit, instr, label) +static void _patch_at(jit_state_t*, jit_word_t, jit_word_t); # if !defined(HAVE_FFSL) # if __X32 # define ffsl(i) __builtin_ffs(i) @@ -698,6 +703,7 @@ static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); # define ffsl(l) __builtin_ffsl(l) # endif # endif +# define jit_cmov_p() jit_cpu.cmov #endif #if CODE @@ -792,44 +798,49 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md, static void _nop(jit_state_t *_jit, jit_int32_t count) { - switch (count) { - case 0: - break; - case 1: /* NOP */ - ic(0x90); break; - case 2: /* 66 NOP */ - ic(0x66); ic(0x90); - break; - case 3: /* NOP DWORD ptr [EAX] */ - ic(0x0f); ic(0x1f); ic(0x00); - break; - case 4: /* NOP DWORD ptr [EAX + 00H] */ - ic(0x0f); ic(0x1f); ic(0x40); ic(0x00); - break; - case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x0f); ic(0x1f); ic(0x44); ic(0x00); - ic(0x00); - break; - case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x44); - ic(0x00); ic(0x00); - break; - case 7: /* NOP DWORD ptr [EAX + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000); - break; - case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x84); ic(0x00); - ii(0x0000); - break; - case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x84); - ic(0x00); ii(0x0000); - break; - default: - abort(); + jit_int32_t i; + while (count) { + if (count > 9) + i = 9; + else + i = count; + switch (i) { + case 0: + break; + case 1: /* NOP */ + ic(0x90); break; + case 2: /* 66 NOP */ + ic(0x66); ic(0x90); + break; + case 3: /* NOP DWORD ptr [EAX] */ + ic(0x0f); ic(0x1f); ic(0x00); + break; + case 4: /* NOP DWORD ptr [EAX + 00H] */ + ic(0x0f); ic(0x1f); ic(0x40); ic(0x00); + break; + case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(0x0f); ic(0x1f); ic(0x44); ic(0x00); + ic(0x00); + break; + case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(0x66); ic(0x0f); ic(0x1f); ic(0x44); + ic(0x00); ic(0x00); + break; + case 7: /* NOP DWORD ptr [EAX + 00000000H] */ + ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000); + break; + case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(0x0f); ic(0x1f); ic(0x84); ic(0x00); + ii(0x0000); + break; + case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(0x66); ic(0x0f); ic(0x1f); ic(0x84); + ic(0x00); ii(0x0000); + break; + } + count -= i; } } - static void _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd) @@ -2171,10 +2182,12 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { + jit_word_t w; rex(0, WIDE, _NOREG, _NOREG, r0); + w = _jit->pc.w; ic(0xb8 | r7(r0)); il(i0); - return (_jit->pc.w); + return (w); } static void @@ -2213,6 +2226,92 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) mrm(0x03, r7(r0), r7(r1)); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t save_rax, restore_rax; + jit_int32_t ascasr_reg, ascasr_use; + if (r0 != _RAX_REGNO) { /* result not in %rax */ + if (r2 != _RAX_REGNO) { /* old value not in %rax */ + save_rax = jit_get_reg(jit_class_gpr); + movr(rn(save_rax), _RAX_REGNO); + restore_rax = 1; + } + else + restore_rax = 0; + } + else + restore_rax = 0; + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + if (r1 == _NOREG) { /* using immediate address */ + if (!can_sign_extend_int_p(i0)) { + ascasr_reg = jit_get_reg(jit_class_gpr); + if (ascasr_reg == _RAX) { + ascasr_reg = jit_get_reg(jit_class_gpr); + jit_unget_reg(_RAX); + } + ascasr_use = 1; + movi(rn(ascasr_reg), i0); + } + else + ascasr_use = 0; + } + else + ascasr_use = 0; + ic(0xf0); /* lock */ + if (ascasr_use) + rex(0, WIDE, r3, _NOREG, rn(ascasr_reg)); + else + rex(0, WIDE, r3, _NOREG, r1); + ic(0x0f); + ic(0xb1); + if (r1 != _NOREG) /* casr */ + rx(r3, 0, r1, _NOREG, _SCL1); + else { /* casi */ + if (ascasr_use) + rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */ + else + rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */ + } + cc(X86_CC_E, r0); + if (r0 != _RAX_REGNO) + movr(r0, _RAX_REGNO); + if (restore_rax) { + movr(_RAX_REGNO, rn(save_rax)); + jit_unget_reg(save_rax); + } + if (ascasr_use) + jit_unget_reg(ascasr_reg); +} + +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x45); + mrm(0x03, r7(r0), r7(r1)); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x44); + mrm(0x03, r7(r0), r7(r1)); +} + #if __X64 static void _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -2232,7 +2331,7 @@ _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #endif static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { extr_us(r0, r1); ic(0x66); @@ -2243,7 +2342,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 0, _NOREG, _NOREG, r0); @@ -2253,7 +2352,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #if __X64 && !__X64_32 static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 1, _NOREG, _NOREG, r0); @@ -2454,7 +2553,11 @@ _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); +# if __X64_32 + ldr_i(r0, rn(reg)); +# else ldr_ui(r0, rn(reg)); +# endif jit_unget_reg(reg); } } @@ -2677,7 +2780,11 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); +# if __X64_32 + ldxr_i(r0, r1, rn(reg)); +# else ldxr_ui(r0, r1, rn(reg)); +# endif jit_unget_reg(reg); } } @@ -2983,208 +3090,221 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -static void +static jit_word_t _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0) { + jit_word_t d; jit_word_t w; + w = _jit->pc.w; + d = i0 - (w + 1); ic(0x70 | code); - w = i0 - (_jit->pc.w + 1); - ic(w); + ic(d); + return (w); } -static void +static jit_word_t _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0) { + jit_word_t d; jit_word_t w; ic(0x0f); + w = _jit->pc.w; + d = i0 - (w + 5); ic(0x80 | code); - w = i0 - (_jit->pc.w + 4); - ii(w); + ii(d); + return (w); } -static void +static jit_word_t _jcr(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { alur(X86_CMP, r0, r1); - jcc(code, i0); + return (jcc(code, i0)); } -static void +static jit_word_t _jci(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { alui(X86_CMP, r0, i1); - jcc(code, i0); + return (jcc(code, i0)); } -static void +static jit_word_t _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0) { testr(r0, r0); - jcc(code, i0); + return (jcc(code, i0)); } static jit_word_t _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_L, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_L, i0, r0, r1)); } static jit_word_t _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_L, i0, r0, i1); - else jci0(X86_CC_S, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_L, i0, r0, i1); + else w = jci0(X86_CC_S, i0, r0); + return (w); } static jit_word_t _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_B, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_B, i0, r0, r1)); } static jit_word_t _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_B, i0, r0, i1); - else jci0(X86_CC_B, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_B, i0, r0, i1); + else w = jci0(X86_CC_B, i0, r0); + return (w); } static jit_word_t _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_LE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_LE, i0, r0, r1); + return (w); } static jit_word_t _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_LE, i0, r0, i1); - else jci0(X86_CC_LE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_LE, i0, r0, i1); + else w = jci0(X86_CC_LE, i0, r0); + return (w); } static jit_word_t _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_BE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_BE, i0, r0, r1); + return (w); } static jit_word_t _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_BE, i0, r0, i1); - else jci0(X86_CC_BE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_BE, i0, r0, i1); + else w = jci0(X86_CC_BE, i0, r0); + return (w); } static jit_word_t _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_E, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_E, i0, r0, r1); + return (w); } static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_E, i0, r0, i1); - else jci0(X86_CC_E, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_E, i0, r0, i1); + else w = jci0(X86_CC_E, i0, r0); + return (w); } static jit_word_t _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_GE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_GE, i0, r0, r1); + return (w); } static jit_word_t _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_GE, i0, r0, i1); - else jci0(X86_CC_NS, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_GE, i0, r0, i1); + else w = jci0(X86_CC_NS, i0, r0); + return (w); } static jit_word_t _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_AE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_AE, i0, r0, r1); + return (w); } static jit_word_t _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_AE, i0, r0, i1); - else jmpi(i0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_AE, i0, r0, i1); + else w = jmpi(i0); + return (w); } static jit_word_t _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_G, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_G, i0, r0, r1)); } static jit_word_t _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - jci(X86_CC_G, i0, r0, i1); - return (_jit->pc.w); + return (jci(X86_CC_G, i0, r0, i1)); } static jit_word_t _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_A, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_A, i0, r0, r1)); } static jit_word_t _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_A, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_A, i0, r0, i1); + else w = jci0(X86_CC_NE, i0, r0); + return (w); } static jit_word_t _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_NE, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_NE, i0, r0, r1)); } static jit_word_t _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_NE, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_NE, i0, r0, i1); + else w = jci0(X86_CC_NE, i0, r0); + return (w); } static jit_word_t _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { testr(r0, r1); - jnz(i0); - return (_jit->pc.w); + return (jnz(i0)); } static jit_word_t @@ -3199,16 +3319,14 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) testr(r0, rn(reg)); jit_unget_reg(reg); } - jnz(i0); - return (_jit->pc.w); + return (jnz(i0)); } static jit_word_t _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { testr(r0, r1); - jz(i0); - return (_jit->pc.w); + return (jz(i0)); } static jit_word_t @@ -3223,16 +3341,14 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) testr(r0, rn(reg)); jit_unget_reg(reg); } - jz(i0); - return (_jit->pc.w); + return (jz(i0)); } static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } static jit_word_t @@ -3241,8 +3357,7 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3254,8 +3369,7 @@ static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } static jit_word_t @@ -3264,8 +3378,7 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3277,8 +3390,7 @@ static jit_word_t _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } static jit_word_t @@ -3287,8 +3399,7 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3300,8 +3411,7 @@ static jit_word_t _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } static jit_word_t @@ -3310,8 +3420,7 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3323,8 +3432,7 @@ static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } static jit_word_t @@ -3333,8 +3441,7 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3346,8 +3453,7 @@ static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } static jit_word_t @@ -3356,8 +3462,7 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3369,8 +3474,7 @@ static jit_word_t _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } static jit_word_t @@ -3379,8 +3483,7 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3392,8 +3495,7 @@ static jit_word_t _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } static jit_word_t @@ -3402,8 +3504,7 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3422,35 +3523,39 @@ _callr(jit_state_t *_jit, jit_int32_t r0) static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; jit_word_t w; + jit_word_t d; + jit_word_t l = _jit->pc.w + 5; + d = i0 - l; #if __X64 - w = i0 - (_jit->pc.w + 5); - if ((jit_int32_t)w == w) { + if ( +# if __X64_32 + !((d < 0) ^ (l < 0)) && +# endif + (jit_int32_t)d == d) { #endif + w = _jit->pc.w; ic(0xe8); - w = i0 - (_jit->pc.w + 4); - ii(w); - word = _jit->pc.w; + ii(d); #if __X64 } else - word = calli_p(i0); + w = calli_p(i0); #endif - return (word); + return (w); } #if __X64 static jit_word_t _calli_p(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; + jit_word_t w; jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); - word = movi_p(rn(reg), i0); + w = movi_p(rn(reg), i0); callr(rn(reg)); jit_unget_reg(reg); - return (word); + return (w); } #endif @@ -3465,43 +3570,49 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; jit_word_t w; + jit_word_t d; + jit_word_t l = _jit->pc.w + 5; + d = i0 - l; #if __X64 - w = i0 - (_jit->pc.w + 5); - if ((jit_int32_t)w == w) { + if ( +# if __X64_32 + !((d < 0) ^ (l < 0)) && +# endif + (jit_int32_t)d == d) { #endif + w = _jit->pc.w; ic(0xe9); - w = i0 - (_jit->pc.w + 4); - ii(w); - word = _jit->pc.w; + ii(d); #if __X64 } else - word = jmpi_p(i0); + w = jmpi_p(i0); #endif - return (word); + return (w); } #if __X64 static jit_word_t _jmpi_p(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; + jit_word_t w; jit_int32_t reg; reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - word = movi_p(rn(reg), i0); + w = movi_p(rn(reg), i0); jmpr(rn(reg)); jit_unget_reg(reg); - return (word); + return (w); } #endif -static void +static jit_word_t _jmpsi(jit_state_t *_jit, jit_uint8_t i0) { + jit_word_t w = _jit->pc.w; ic(0xeb); ic(i0); + return (w); } static void @@ -3615,8 +3726,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* test %al, %al */ ic(0x84); ic(0xc0); - jes(0); - nofp_code = _jit->pc.w; + nofp_code = jes(0); /* Save fp registers in the save area, if any is a vararg */ /* Note that the full 16 byte xmm is not saved, because @@ -3627,7 +3737,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) sse_stxi_d(_jitc->function->vaoff + first_fp_offset + reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg)); - patch_rel_char(nofp_code, _jit->pc.w); + patch_at(nofp_code, _jit->pc.w); } } #endif @@ -3760,8 +3870,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) /* Jump over if there are no remaining arguments in the save area. */ icmpi(rn(rg0), va_gp_max_offset); - jaes(0); - ge_code = _jit->pc.w; + ge_code = jaes(0); /* Load the save area pointer in the second temporary. */ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); @@ -3777,11 +3886,10 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(rg1); /* Jump over overflow code. */ - jmpsi(0); - lt_code = _jit->pc.w; + lt_code = jmpsi(0); /* Where to land if argument is in overflow area. */ - patch_rel_char(ge_code, _jit->pc.w); + patch_at(ge_code, _jit->pc.w); /* Load overflow pointer. */ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); @@ -3794,7 +3902,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); /* Where to land if argument is in save area. */ - patch_rel_char(lt_code, _jit->pc.w); + patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); #endif @@ -3828,8 +3936,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) /* Jump over if there are no remaining arguments in the save area. */ icmpi(rn(rg0), va_fp_max_offset); - jaes(0); - ge_code = _jit->pc.w; + ge_code = jaes(0); /* Load the save area pointer in the second temporary. */ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); @@ -3848,11 +3955,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) jit_unget_reg(rg1); /* Jump over overflow code. */ - jmpsi(0); - lt_code = _jit->pc.w; + lt_code = jmpsi(0); /* Where to land if argument is in overflow area. */ - patch_rel_char(ge_code, _jit->pc.w); + patch_at(ge_code, _jit->pc.w); /* Load overflow pointer. */ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); @@ -3868,27 +3974,43 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); /* Where to land if argument is in save area. */ - patch_rel_char(lt_code, _jit->pc.w); + patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); #endif } static void -_patch_at(jit_state_t *_jit, jit_node_t *node, - jit_word_t instr, jit_word_t label) +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) { - switch (node->code) { -# if __X64 - case jit_code_calli: - case jit_code_jmpi: -# endif - case jit_code_movi: - patch_abs(instr, label); + jit_word_t disp; + jit_uint8_t *code = (jit_uint8_t *)instr; + ++instr; + switch (code[0]) { + /* movi */ + case 0xb8 ... 0xbf: + *(jit_word_t *)instr = label; break; - default: - patch_rel(instr, label); + /* calli */ + case 0xe8: + /* jmpi */ + case 0xe9: + /* jcc */ + case 0x80 ... 0x8f: + disp = label - (instr + 4); + assert((jit_int32_t)disp == disp); + *(jit_int32_t *)instr = disp; break; + /* jccs */ + case 0x70 ... 0x7f: + /* jmpsi */ + case 0xeb: + disp = label - (instr + 1); + assert((jit_int8_t)disp == disp); + *(jit_int8_t *)instr = disp; + break; + default: + abort(); } } #endif diff --git a/deps/lightning/lib/jit_x86-sse.c b/deps/lightning/lib/jit_x86-sse.c index d09bda9ba..bba62bc81 100644 --- a/deps/lightning/lib/jit_x86-sse.c +++ b/deps/lightning/lib/jit_x86-sse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -470,14 +470,14 @@ _sse_b##name##i_##type(jit_state_t *_jit, \ jit_word_t i0, jit_int32_t r0, \ jit_float##size##_t *i1) \ { \ - jit_word_t word; \ + jit_word_t w; \ jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \ jit_class_nospill); \ assert(jit_sse_reg_p(reg)); \ sse_movi_##type(rn(reg), i1); \ - word = sse_b##name##r_##type(i0, r0, rn(reg)); \ + w = sse_b##name##r_##type(i0, r0, rn(reg)); \ jit_unget_reg(reg); \ - return (word); \ + return (w); \ } # define fopi(name) fpr_opi(name, f, 32) # define fbopi(name) fpr_bopi(name, f, 32) @@ -840,10 +840,9 @@ _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } ixorr(reg, reg); ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -866,10 +865,9 @@ _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } imovi(reg, 1); ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1022,8 +1020,7 @@ static jit_word_t _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } fbopi(lt) @@ -1031,21 +1028,20 @@ static jit_word_t _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } fbopi(le) static jit_word_t _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + w = je(i0); + patch_at(jp_code, _jit->pc.w); + return (w); } fbopi(eq) @@ -1053,8 +1049,7 @@ static jit_word_t _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } fbopi(ge) @@ -1062,25 +1057,23 @@ static jit_word_t _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } fbopi(gt) static jit_word_t _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; jit_word_t jz_code; ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } fbopi(ne) @@ -1088,47 +1081,49 @@ static jit_word_t _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } fbopi(unlt) static jit_word_t _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r0, r1); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } fbopi(unle) static jit_word_t _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r0, r1); - je(i0); + w = je(i0); } - return (_jit->pc.w); + return (w); } fbopi(uneq) static jit_word_t _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r1, r0); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } fbopi(unge) @@ -1136,8 +1131,7 @@ static jit_word_t _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } fbopi(ungt) @@ -1145,8 +1139,7 @@ static jit_word_t _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jne(i0); - return (_jit->pc.w); + return (jne(i0)); } fbopi(ltgt) @@ -1154,8 +1147,7 @@ static jit_word_t _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jnp(i0); - return (_jit->pc.w); + return (jnp(i0)); } fbopi(ord) @@ -1163,8 +1155,7 @@ static jit_word_t _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jp(i0); - return (_jit->pc.w); + return (jp(i0)); } fbopi(unord) @@ -1185,10 +1176,9 @@ _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } ixorr(reg, reg); ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1211,10 +1201,9 @@ _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } imovi(reg, 1); ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1415,8 +1404,7 @@ static jit_word_t _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } dbopi(lt) @@ -1424,21 +1412,20 @@ static jit_word_t _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } dbopi(le) static jit_word_t _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + w = je(i0); + patch_at(jp_code, _jit->pc.w); + return (w); } dbopi(eq) @@ -1446,8 +1433,7 @@ static jit_word_t _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } dbopi(ge) @@ -1455,25 +1441,23 @@ static jit_word_t _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } dbopi(gt) static jit_word_t _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; jit_word_t jz_code; ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } dbopi(ne) @@ -1481,47 +1465,49 @@ static jit_word_t _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } dbopi(unlt) static jit_word_t _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r0, r1); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } dbopi(unle) static jit_word_t _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r0, r1); - je(i0); + w = je(i0); } - return (_jit->pc.w); + return (w); } dbopi(uneq) static jit_word_t _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r1, r0); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } dbopi(unge) @@ -1529,8 +1515,7 @@ static jit_word_t _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } dbopi(ungt) @@ -1538,8 +1523,7 @@ static jit_word_t _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jne(i0); - return (_jit->pc.w); + return (jne(i0)); } dbopi(ltgt) @@ -1547,8 +1531,7 @@ static jit_word_t _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jnp(i0); - return (_jit->pc.w); + return (jnp(i0)); } dbopi(ord) @@ -1556,8 +1539,7 @@ static jit_word_t _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jp(i0); - return (_jit->pc.w); + return (jp(i0)); } dbopi(unord) # undef fopi diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index 663b840f8..efc3dba4f 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -3,9 +3,10 @@ #define JIT_INSTR_MAX 42 0, /* data */ 0, /* live */ - 3, /* align */ + 11, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 3, /* label */ @@ -14,7 +15,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,8 +26,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 3, /* va_start */ 5, /* va_arg */ 7, /* va_arg_d */ @@ -36,9 +52,9 @@ 5, /* addxi */ 4, /* subr */ 6, /* subi */ - 6, /* subcr */ + 12, /* subcr */ 6, /* subci */ - 6, /* subxr */ + 12, /* subxr */ 5, /* subxi */ 8, /* rsbi */ 5, /* mulr */ @@ -52,9 +68,9 @@ 22, /* divr_u */ 25, /* divi_u */ 23, /* qdivr */ - 26, /* qdivi */ + 28, /* qdivi */ 24, /* qdivr_u */ - 27, /* qdivi_u */ + 29, /* qdivi_u */ 21, /* remr */ 24, /* remi */ 22, /* remr_u */ @@ -95,12 +111,19 @@ 16, /* nei */ 2, /* movr */ 5, /* movi */ + 5, /* movnr */ + 5, /* movzr */ + 9, /* casr */ + 13, /* casi */ 11, /* extr_c */ 11, /* extr_uc */ 3, /* extr_s */ 3, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 7, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 7, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -193,13 +216,37 @@ 2, /* callr */ 5, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -207,93 +254,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 24, /* epilog */ + 23, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 8, /* addr_f */ - 19, /* addi_f */ + 17, /* addi_f */ 12, /* subr_f */ - 19, /* subi_f */ + 17, /* subi_f */ 21, /* rsbi_f */ 8, /* mulr_f */ - 19, /* muli_f */ + 17, /* muli_f */ 12, /* divr_f */ - 19, /* divi_f */ + 17, /* divi_f */ 12, /* negr_f */ 12, /* absr_f */ - 6, /* sqrtr_f */ - 13, /* ltr_f */ + 4, /* sqrtr_f */ + 12, /* ltr_f */ 27, /* lti_f */ - 13, /* ler_f */ + 12, /* ler_f */ 27, /* lei_f */ - 15, /* eqr_f */ + 14, /* eqr_f */ 29, /* eqi_f */ - 13, /* ger_f */ + 12, /* ger_f */ 27, /* gei_f */ - 13, /* gtr_f */ + 12, /* gtr_f */ 27, /* gti_f */ - 18, /* ner_f */ + 17, /* ner_f */ 32, /* nei_f */ - 13, /* unltr_f */ + 12, /* unltr_f */ 27, /* unlti_f */ - 13, /* unler_f */ + 12, /* unler_f */ 27, /* unlei_f */ - 13, /* uneqr_f */ + 12, /* uneqr_f */ 27, /* uneqi_f */ - 13, /* unger_f */ + 12, /* unger_f */ 27, /* ungei_f */ - 13, /* ungtr_f */ + 12, /* ungtr_f */ 27, /* ungti_f */ - 13, /* ltgtr_f */ + 12, /* ltgtr_f */ 27, /* ltgti_f */ - 13, /* ordr_f */ + 12, /* ordr_f */ 27, /* ordi_f */ - 13, /* unordr_f */ + 12, /* unordr_f */ 27, /* unordi_f */ - 8, /* truncr_f_i */ + 4, /* truncr_f_i */ 0, /* truncr_f_l */ - 8, /* extr_f */ + 4, /* extr_f */ 4, /* extr_d_f */ 10, /* movr_f */ - 19, /* movi_f */ + 15, /* movi_f */ 4, /* ldr_f */ 8, /* ldi_f */ 5, /* ldxr_f */ 8, /* ldxi_f */ - 6, /* str_f */ - 10, /* sti_f */ - 7, /* stxr_f */ + 4, /* str_f */ + 8, /* sti_f */ + 5, /* stxr_f */ 8, /* stxi_f */ - 10, /* bltr_f */ - 23, /* blti_f */ - 10, /* bler_f */ - 23, /* blei_f */ - 12, /* beqr_f */ - 25, /* beqi_f */ - 10, /* bger_f */ - 23, /* bgei_f */ - 10, /* bgtr_f */ - 23, /* bgti_f */ - 13, /* bner_f */ - 26, /* bnei_f */ - 10, /* bunltr_f */ - 23, /* bunlti_f */ - 10, /* bunler_f */ - 23, /* bunlei_f */ - 10, /* buneqr_f */ - 23, /* buneqi_f */ - 10, /* bunger_f */ - 23, /* bungei_f */ - 10, /* bungtr_f */ - 23, /* bungti_f */ - 10, /* bltgtr_f */ - 23, /* bltgti_f */ - 10, /* bordr_f */ - 23, /* bordi_f */ - 10, /* bunordr_f */ - 23, /* bunordi_f */ + 9, /* bltr_f */ + 18, /* blti_f */ + 9, /* bler_f */ + 18, /* blei_f */ + 11, /* beqr_f */ + 20, /* beqi_f */ + 9, /* bger_f */ + 18, /* bgei_f */ + 9, /* bgtr_f */ + 18, /* bgti_f */ + 12, /* bner_f */ + 21, /* bnei_f */ + 9, /* bunltr_f */ + 18, /* bunlti_f */ + 9, /* bunler_f */ + 18, /* bunlei_f */ + 9, /* buneqr_f */ + 18, /* buneqi_f */ + 9, /* bunger_f */ + 18, /* bungei_f */ + 9, /* bungtr_f */ + 18, /* bungti_f */ + 9, /* bltgtr_f */ + 18, /* bltgti_f */ + 9, /* bordr_f */ + 18, /* bordi_f */ + 9, /* bunordr_f */ + 18, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -314,7 +361,7 @@ 26, /* divi_d */ 18, /* negr_d */ 13, /* absr_d */ - 6, /* sqrtr_d */ + 4, /* sqrtr_d */ 13, /* ltr_d */ 37, /* lti_d */ 13, /* ler_d */ @@ -343,19 +390,19 @@ 37, /* ordi_d */ 13, /* unordr_d */ 37, /* unordi_d */ - 8, /* truncr_d_i */ + 4, /* truncr_d_i */ 0, /* truncr_d_l */ - 8, /* extr_d */ + 4, /* extr_d */ 4, /* extr_f_d */ 10, /* movr_d */ - 24, /* movi_d */ + 30, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 5, /* ldxr_d */ 8, /* ldxi_d */ - 6, /* str_d */ - 10, /* sti_d */ - 7, /* stxr_d */ + 4, /* str_d */ + 8, /* sti_d */ + 5, /* stxr_d */ 8, /* stxi_d */ 10, /* bltr_d */ 28, /* blti_d */ @@ -399,16 +446,17 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ -#endif +#endif /* __X32 */ #if __X64 #if __CYGWIN__ || _WIN32 #define JIT_INSTR_MAX 130 0, /* data */ 0, /* live */ - 6, /* align */ + 27, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 7, /* label */ @@ -417,7 +465,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -425,8 +476,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 7, /* va_start */ 7, /* va_arg */ 9, /* va_arg_d */ @@ -498,29 +561,36 @@ 14, /* nei */ 3, /* movr */ 10, /* movi */ + 7, /* movnr */ + 7, /* movzr */ + 11, /* casr */ + 21, /* casi */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 3, /* extr_i */ 3, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 6, /* htonr_ul */ 4, /* ldr_c */ - 15, /* ldi_c */ + 14, /* ldi_c */ 4, /* ldr_uc */ - 15, /* ldi_uc */ + 14, /* ldi_uc */ 4, /* ldr_s */ - 15, /* ldi_s */ + 14, /* ldi_s */ 4, /* ldr_us */ - 15, /* ldi_us */ + 14, /* ldi_us */ 3, /* ldr_i */ - 14, /* ldi_i */ + 13, /* ldi_i */ 3, /* ldr_ui */ - 14, /* ldi_ui */ + 13, /* ldi_ui */ 3, /* ldr_l */ - 14, /* ldi_l */ + 13, /* ldi_l */ 5, /* ldxr_c */ 8, /* ldxi_c */ 5, /* ldxr_uc */ @@ -536,13 +606,13 @@ 4, /* ldxr_l */ 7, /* ldxi_l */ 6, /* str_c */ - 17, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 15, /* sti_s */ + 14, /* sti_s */ 3, /* str_i */ - 14, /* sti_i */ + 13, /* sti_i */ 3, /* str_l */ - 14, /* sti_l */ + 13, /* sti_l */ 7, /* stxr_c */ 7, /* stxi_c */ 5, /* stxr_s */ @@ -591,18 +661,42 @@ 10, /* bxsubi */ 9, /* bxsubr_u */ 10, /* bxsubi_u */ - 3, /* jmpr */ + 2, /* jmpr */ 5, /* jmpi */ - 3, /* callr */ - 13, /* calli */ + 2, /* callr */ + 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -616,87 +710,87 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 19, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ - 27, /* rsbi_f */ + 19, /* subi_f */ + 26, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 19, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ - 15, /* negr_f */ + 19, /* divi_f */ + 14, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ 16, /* ltr_f */ - 31, /* lti_f */ + 30, /* lti_f */ 16, /* ler_f */ - 31, /* lei_f */ + 30, /* lei_f */ 18, /* eqr_f */ - 33, /* eqi_f */ + 32, /* eqi_f */ 16, /* ger_f */ - 31, /* gei_f */ + 30, /* gei_f */ 16, /* gtr_f */ - 31, /* gti_f */ + 30, /* gti_f */ 20, /* ner_f */ - 35, /* nei_f */ + 34, /* nei_f */ 16, /* unltr_f */ - 31, /* unlti_f */ + 30, /* unlti_f */ 16, /* unler_f */ - 31, /* unlei_f */ + 30, /* unlei_f */ 16, /* uneqr_f */ - 31, /* uneqi_f */ + 30, /* uneqi_f */ 16, /* unger_f */ - 31, /* ungei_f */ + 30, /* ungei_f */ 16, /* ungtr_f */ - 31, /* ungti_f */ + 30, /* ungti_f */ 16, /* ltgtr_f */ - 31, /* ltgti_f */ + 30, /* ltgti_f */ 16, /* ordr_f */ - 31, /* ordi_f */ + 30, /* ordi_f */ 16, /* unordr_f */ - 31, /* unordi_f */ + 30, /* unordi_f */ 5, /* truncr_f_i */ 5, /* truncr_f_l */ 5, /* extr_f */ 5, /* extr_d_f */ 5, /* movr_f */ - 15, /* movi_f */ + 18, /* movi_f */ 5, /* ldr_f */ - 16, /* ldi_f */ + 15, /* ldi_f */ 6, /* ldxr_f */ 8, /* ldxi_f */ 5, /* str_f */ - 16, /* sti_f */ + 15, /* sti_f */ 6, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 19, /* blti_f */ 10, /* bler_f */ - 24, /* blei_f */ + 23, /* blei_f */ 12, /* beqr_f */ 27, /* beqi_f */ 10, /* bger_f */ - 25, /* bgei_f */ + 24, /* bgei_f */ 10, /* bgtr_f */ - 25, /* bgti_f */ + 24, /* bgti_f */ 13, /* bner_f */ - 28, /* bnei_f */ + 27, /* bnei_f */ 10, /* bunltr_f */ - 25, /* bunlti_f */ + 24, /* bunlti_f */ 10, /* bunler_f */ - 25, /* bunlei_f */ + 24, /* bunlei_f */ 10, /* buneqr_f */ - 25, /* buneqi_f */ + 24, /* buneqi_f */ 10, /* bunger_f */ - 25, /* bungei_f */ + 24, /* bungei_f */ 10, /* bungtr_f */ - 25, /* bungti_f */ + 24, /* bungti_f */ 10, /* bltgtr_f */ - 25, /* bltgti_f */ + 24, /* bltgti_f */ 10, /* bordr_f */ - 25, /* bordi_f */ + 24, /* bordi_f */ 10, /* bunordr_f */ - 25, /* bunordi_f */ + 24, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -715,7 +809,7 @@ 25, /* muli_d */ 15, /* divr_d */ 25, /* divi_d */ - 22, /* negr_d */ + 21, /* negr_d */ 16, /* absr_d */ 5, /* sqrtr_d */ 17, /* ltr_d */ @@ -751,13 +845,13 @@ 5, /* extr_d */ 5, /* extr_f_d */ 5, /* movr_d */ - 15, /* movi_d */ + 29, /* movi_d */ 5, /* ldr_d */ - 16, /* ldi_d */ + 15, /* ldi_d */ 6, /* ldxr_d */ 8, /* ldxi_d */ 5, /* str_d */ - 16, /* sti_d */ + 15, /* sti_d */ 6, /* stxr_d */ 9, /* stxi_d */ 11, /* bltr_d */ @@ -808,9 +902,10 @@ #define JIT_INSTR_MAX 108 0, /* data */ 0, /* live */ - 3, /* align */ + 7, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 3, /* label */ @@ -819,7 +914,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -827,11 +925,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 41, /* va_start */ - 45, /* va_arg */ - 54, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 33, /* va_start */ + 43, /* va_arg */ + 45, /* va_arg_d */ 0, /* va_end */ 5, /* addr */ 7, /* addi */ @@ -900,12 +1010,19 @@ 14, /* nei */ 3, /* movr */ 6, /* movi */ + 7, /* movnr */ + 7, /* movzr */ + 11, /* casr */ + 16, /* casi */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 0, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 0, /* htonr_ul */ @@ -945,11 +1062,11 @@ 8, /* sti_i */ 0, /* str_l */ 0, /* sti_l */ - 12, /* stxr_c */ + 11, /* stxr_c */ 7, /* stxi_c */ - 10, /* stxr_s */ + 9, /* stxr_s */ 7, /* stxi_s */ - 9, /* stxr_i */ + 8, /* stxr_i */ 6, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ @@ -995,16 +1112,40 @@ 10, /* bxsubi_u */ 2, /* jmpr */ 5, /* jmpi */ - 3, /* callr */ + 2, /* callr */ 9, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1018,14 +1159,14 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 20, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ - 26, /* rsbi_f */ + 20, /* subi_f */ + 25, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 20, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ + 20, /* divi_f */ 15, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ @@ -1065,40 +1206,40 @@ 11, /* movi_f */ 6, /* ldr_f */ 10, /* ldi_f */ - 11, /* ldxr_f */ + 10, /* ldxr_f */ 9, /* ldxi_f */ 6, /* str_f */ 10, /* sti_f */ - 11, /* stxr_f */ + 10, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 20, /* blti_f */ 10, /* bler_f */ - 21, /* blei_f */ + 20, /* blei_f */ 12, /* beqr_f */ 23, /* beqi_f */ 10, /* bger_f */ - 21, /* bgei_f */ + 20, /* bgei_f */ 10, /* bgtr_f */ - 21, /* bgti_f */ + 20, /* bgti_f */ 13, /* bner_f */ - 24, /* bnei_f */ + 23, /* bnei_f */ 10, /* bunltr_f */ - 21, /* bunlti_f */ + 20, /* bunlti_f */ 10, /* bunler_f */ - 21, /* bunlei_f */ + 20, /* bunlei_f */ 10, /* buneqr_f */ - 21, /* buneqi_f */ + 20, /* buneqi_f */ 10, /* bunger_f */ - 21, /* bungei_f */ + 20, /* bungei_f */ 10, /* bungtr_f */ - 21, /* bungti_f */ + 20, /* bungti_f */ 10, /* bltgtr_f */ - 21, /* bltgti_f */ + 20, /* bltgti_f */ 10, /* bordr_f */ - 21, /* bordi_f */ + 20, /* bordi_f */ 10, /* bunordr_f */ - 21, /* bunordi_f */ + 20, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -1109,14 +1250,14 @@ 0, /* putargr_d */ 0, /* putargi_d */ 10, /* addr_d */ - 33, /* addi_d */ + 29, /* addi_d */ 15, /* subr_d */ - 33, /* subi_d */ - 38, /* rsbi_d */ + 29, /* subi_d */ + 34, /* rsbi_d */ 10, /* mulr_d */ - 33, /* muli_d */ + 29, /* muli_d */ 15, /* divr_d */ - 33, /* divi_d */ + 29, /* divi_d */ 22, /* negr_d */ 16, /* absr_d */ 5, /* sqrtr_d */ @@ -1156,40 +1297,40 @@ 23, /* movi_d */ 6, /* ldr_d */ 10, /* ldi_d */ - 11, /* ldxr_d */ + 10, /* ldxr_d */ 9, /* ldxi_d */ 6, /* str_d */ 10, /* sti_d */ - 11, /* stxr_d */ + 10, /* stxr_d */ 9, /* stxi_d */ 11, /* bltr_d */ - 34, /* blti_d */ + 30, /* blti_d */ 11, /* bler_d */ - 34, /* blei_d */ + 30, /* blei_d */ 13, /* beqr_d */ 36, /* beqi_d */ 11, /* bger_d */ - 34, /* bgei_d */ + 30, /* bgei_d */ 11, /* bgtr_d */ - 34, /* bgti_d */ + 30, /* bgti_d */ 14, /* bner_d */ - 37, /* bnei_d */ + 33, /* bnei_d */ 11, /* bunltr_d */ - 34, /* bunlti_d */ + 30, /* bunlti_d */ 11, /* bunler_d */ - 34, /* bunlei_d */ + 30, /* bunlei_d */ 11, /* buneqr_d */ - 34, /* buneqi_d */ + 30, /* buneqi_d */ 11, /* bunger_d */ - 34, /* bungei_d */ + 30, /* bungei_d */ 11, /* bungtr_d */ - 34, /* bungti_d */ + 30, /* bungti_d */ 11, /* bltgtr_d */ - 34, /* bltgti_d */ + 30, /* bltgti_d */ 11, /* bordr_d */ - 34, /* bordi_d */ + 30, /* bordi_d */ 11, /* bunordr_d */ - 34, /* bunordi_d */ + 30, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -1204,14 +1345,15 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ +#else -# else #define JIT_INSTR_MAX 115 0, /* data */ 0, /* live */ - 6, /* align */ + 27, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 7, /* label */ @@ -1220,7 +1362,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -1228,11 +1373,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 42, /* va_start */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 38, /* va_start */ 41, /* va_arg */ - 50, /* va_arg_d */ + 48, /* va_arg_d */ 0, /* va_end */ 5, /* addr */ 13, /* addi */ @@ -1301,12 +1458,19 @@ 14, /* nei */ 3, /* movr */ 10, /* movi */ + 7, /* movnr */ + 7, /* movzr */ + 11, /* casr */ + 16, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 3, /* extr_i */ 3, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 6, /* htonr_ul */ @@ -1394,18 +1558,42 @@ 10, /* bxsubi */ 9, /* bxsubr_u */ 10, /* bxsubi_u */ - 3, /* jmpr */ + 2, /* jmpr */ 5, /* jmpi */ - 3, /* callr */ + 2, /* callr */ 13, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1419,14 +1607,14 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 20, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ + 20, /* subi_f */ 30, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 20, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ + 20, /* divi_f */ 15, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ @@ -1473,7 +1661,7 @@ 7, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 20, /* blti_f */ 10, /* bler_f */ 25, /* blei_f */ 12, /* beqr_f */ diff --git a/deps/lightning/lib/jit_x86-x87.c b/deps/lightning/lib/jit_x86-x87.c index 4453bf300..e1038836d 100644 --- a/deps/lightning/lib/jit_x86-x87.c +++ b/deps/lightning/lib/jit_x86-x87.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -408,14 +408,14 @@ _x87_b##name##i_##type(jit_state_t *_jit, \ jit_word_t i0, jit_int32_t r0, \ jit_float##size##_t *i1) \ { \ - jit_word_t word; \ + jit_word_t w; \ jit_int32_t reg = jit_get_reg(jit_class_fpr| \ jit_class_nospill); \ assert(jit_x87_reg_p(reg)); \ x87_movi_##type(rn(reg), i1); \ - word = x87_b##name##r_##type(i0, r0, rn(reg)); \ + w = x87_b##name##r_##type(i0, r0, rn(reg)); \ jit_unget_reg(reg); \ - return (word); \ + return (w); \ } # define fopi(name) fpr_opi(name, f, 32) # define fbopi(name) fpr_bopi(name, f, 32) @@ -771,8 +771,7 @@ _x87jcc(jit_state_t *_jit, jit_int32_t code, fldr(r0); fucomipr(r1 + 1); } - jcc(code, i0); - return (_jit->pc.w); + return (jcc(code, i0)); } static jit_word_t @@ -788,8 +787,7 @@ _x87jcc2(jit_state_t *_jit, jit_int32_t code, fldr(f0); fucomipr(f1 + 1); } - jcc(code, i0); - return (_jit->pc.w); + return (jcc(code, i0)); } fopi(lt) @@ -1082,10 +1080,9 @@ _x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) fldr(f1); fucomipr(f2 + 1); } - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1115,10 +1112,9 @@ _x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) fldr(f1); fucomipr(f2 + 1); } - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1283,6 +1279,7 @@ dbopi(le) static jit_word_t _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_int32_t f0, f1; jit_word_t jp_code; if (r1 == _ST0_REGNO) f0 = r1, f1 = r0; @@ -1293,11 +1290,10 @@ _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) fldr(f0); fucomipr(f1 + 1); } - jpes(0); - jp_code = _jit->pc.w; - jcc(X86_CC_E, i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jpes(0); + w = jcc(X86_CC_E, i0); + patch_at(jp_code, _jit->pc.w); + return (w); } dbopi(eq) dbopi(ge) @@ -1306,6 +1302,7 @@ dbopi(gt) static jit_word_t _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_int32_t f0, f1; jit_word_t jp_code; jit_word_t jz_code; @@ -1317,14 +1314,12 @@ _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) fldr(f0); fucomipr(f1 + 1); } - jpes(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jpes(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } dbopi(ne) dbopi(unlt) diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index 0386dab44..f46c8615e 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -500,22 +500,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - /* movr(%ret, %ret) would be optimized out */ - if (JIT_RET != u) - jit_movr(JIT_RET, u); - /* explicitly tell it is live */ - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -575,7 +571,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -629,12 +625,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif #if __X64 if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; @@ -648,7 +647,7 @@ _jit_arg(jit_state_t *_jit) offset = _jitc->function->self.size; _jitc->function->self.size += REAL_WORDSIZE; } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -715,7 +714,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) @@ -729,7 +728,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) @@ -743,7 +742,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) @@ -757,7 +756,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) @@ -771,7 +770,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) { @@ -791,7 +790,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); @@ -803,7 +802,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); @@ -814,10 +813,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); @@ -828,11 +827,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); @@ -944,10 +943,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if __X64 if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -969,11 +968,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if __X64 if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -1212,23 +1211,19 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; #if __X64 - /* FIXME preventing %rax allocation is good enough, but for consistency - * it should automatically detect %rax is dead, in case it has run out - * registers, and not save/restore it, what would be wrong if using the - * the return value, otherwise, just a needless noop */ - /* >> prevent %rax from being allocated as the function pointer */ - jit_regset_setbit(&_jitc->regarg, _RAX); - reg = jit_get_reg(jit_class_gpr); - node = jit_movi(reg, (jit_word_t)i0); - jit_finishr(reg); - jit_unget_reg(reg); - /* << prevent %rax from being allocated as the function pointer */ - jit_regset_clrbit(&_jitc->regarg, _RAX); -#else +# if !(__CYGWIN__ || _WIN32) + if (_jitc->function->call.call & jit_call_varargs) { + if (_jitc->function->call.argf) + jit_movi(_RAX, _jitc->function->call.argf); + else + jit_movi(_RAX, 0); + jit_live(_RAX); + } +# endif +#endif node = jit_calli(i0); node->v.w = _jitc->function->call.argi; node->w.w = _jitc->function->call.argf; -#endif _jitc->function->call.argi = _jitc->function->call.argf = _jitc->function->call.size = 0; _jitc->prepare = 0; @@ -1333,6 +1328,10 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif jit_int32_t patch_offset; } undo; @@ -1580,12 +1579,15 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; - case jit_code_note: case jit_code_name: + case jit_code_skip: + nop(node->u.w); + break; + case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; case jit_code_label: @@ -1661,6 +1663,16 @@ _emit_code(jit_state_t *_jit) case_rrw(gt, _u); case_rrr(ne,); case_rrw(ne,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1683,6 +1695,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __X64 && !__X64_32 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __X64 && !__X64_32 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1999,7 +2016,14 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); +#if __X64 + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if ((jit_int32_t)word == word) + word = jmpi(_jit->pc.w); + else +#endif + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -2017,7 +2041,14 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); +#if __X64 + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if ((jit_int32_t)word == word) + word = calli(_jit->pc.w); + else +#endif + word = calli_p(_jit->pc.w); patch(word, node); } } @@ -2028,6 +2059,10 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; @@ -2045,6 +2080,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif _jitc->patches.offset = undo.patch_offset; goto restart_function; } @@ -2069,11 +2114,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -2083,10 +2140,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -2148,7 +2221,7 @@ _emit_code(jit_state_t *_jit) for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; - patch_at(node, _jitc->patches.ptr[offset].inst, word); + patch_at(_jitc->patches.ptr[offset].inst, word); } jit_flush(_jit->code.ptr, _jit->pc.uc); diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index c18e6c0ab..32de98780 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -19,7 +19,9 @@ #include #include -#include +#if HAVE_MMAP +# include +#endif #if defined(__sgi) # include #endif @@ -62,13 +64,28 @@ static void _del_label(jit_state_t*, jit_node_t*, jit_node_t*); static void _jit_dataset(jit_state_t *_jit); +#define block_update_set(block, target) _block_update_set(_jit, block, target) +static jit_bool_t _block_update_set(jit_state_t*, jit_block_t*, jit_block_t*); + +#define propagate_backward(block) _propagate_backward(_jit, block) +static void _propagate_backward(jit_state_t*, jit_block_t*); + +#define check_block_again() _check_block_again(_jit) +static jit_bool_t _check_block_again(jit_state_t*); + +#define do_setup() _do_setup(_jit) +static void _do_setup(jit_state_t*); + #define jit_setup(block) _jit_setup(_jit, block) static void _jit_setup(jit_state_t *_jit, jit_block_t *block); -#define jit_follow(block, todo) _jit_follow(_jit, block, todo) +#define do_follow(always) _do_follow(_jit, always) +static void _do_follow(jit_state_t*, jit_bool_t); + +#define jit_follow(block) _jit_follow(_jit, block) static void -_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo); +_jit_follow(jit_state_t *_jit, jit_block_t *block); #define jit_update(node, live, mask) _jit_update(_jit, node, live, mask) static void @@ -103,7 +120,7 @@ static jit_bool_t _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node); #define redundant_store(node, jump) _redundant_store(_jit, node, jump) -static void +static jit_bool_t _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump); #define simplify_movr(p, n, k, s) _simplify_movr(_jit, p, n, k, s) @@ -129,7 +146,7 @@ static void _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno); #define simplify() _simplify(_jit) -static void +static jit_bool_t _simplify(jit_state_t *_jit); #define jit_reg_undef -1 @@ -206,8 +223,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec) for (regno = 0; regno < _jitc->reglen; regno++) { if ((jit_class(_rvs[regno].spec) & spec) == spec && !jit_regset_tstbit(&_jitc->regarg, regno) && - !jit_regset_tstbit(&_jitc->reglive, regno)) + !jit_regset_tstbit(&_jitc->reglive, regno)) { + if (jit_regset_tstbit(&_jitc->regmask, regno)) { + /* search further, attempting to find a truly known + * free register, not just one in unknown state. */ + jit_int32_t regfree; + + for (regfree = regno + 1; + regfree < _jitc->reglen; regfree++) { + if ((jit_class(_rvs[regfree].spec) & spec) == spec && + !jit_regset_tstbit(&_jitc->regarg, regfree) && + !jit_regset_tstbit(&_jitc->reglive, regfree) && + !jit_regset_tstbit(&_jitc->regmask, regfree)) { + regno = regfree; + break; + } + } + } goto regarg; + } } /* search for a register matching spec that is not an argument @@ -853,6 +887,7 @@ jit_new_state(void) jit_regset_new(&_jitc->regsav); jit_regset_new(&_jitc->reglive); jit_regset_new(&_jitc->regmask); + jit_regset_new(&_jitc->explive); jit_init(); @@ -935,10 +970,15 @@ _jit_clear_state(jit_state_t *_jit) void _jit_destroy_state(jit_state_t *_jit) { +#if DEVEL_DISASSEMBLER + jit_really_clear_state(); +#endif +#if HAVE_MMAP if (!_jit->user_code) munmap(_jit->code.ptr, _jit->code.length); if (!_jit->user_data) munmap(_jit->data.ptr, _jit->data.length); +#endif jit_free((jit_pointer_t *)&_jit); } @@ -1113,6 +1153,20 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_word_t v, + jit_int32_t l, jit_int32_t h) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + node->u.w = u; + node->v.w = v; + node->w.q.l = l; + node->w.q.h = h; + return (link_node(node)); +} + jit_node_t * _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float32_t w) @@ -1281,14 +1335,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = 0; break; case jit_code_live: case jit_code_va_end: - case jit_code_retr: case jit_code_retr_f: case jit_code_retr_d: - case jit_code_pushargr: case jit_code_pushargr_f: + case jit_code_retr_c: case jit_code_retr_uc: + case jit_code_retr_s: case jit_code_retr_us: + case jit_code_retr_i: case jit_code_retr_ui: + case jit_code_retr_l: + case jit_code_retr_f: case jit_code_retr_d: + case jit_code_pushargr_c: + case jit_code_pushargr_uc: + case jit_code_pushargr_s: + case jit_code_pushargr_us: + case jit_code_pushargr_i: + case jit_code_pushargr_ui: + case jit_code_pushargr_l: + case jit_code_pushargr_f: case jit_code_pushargr_d: case jit_code_finishr: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_reg; break; - case jit_code_align: case jit_code_reti: case jit_code_pushargi: - case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ + case jit_code_align: case jit_code_skip: + case jit_code_reti_c: case jit_code_reti_uc: + case jit_code_reti_s: case jit_code_reti_us: + case jit_code_reti_i: case jit_code_reti_ui: + case jit_code_reti_l: + case jit_code_pushargi_c: + case jit_code_pushargi_uc: + case jit_code_pushargi_s: + case jit_code_pushargi_us: + case jit_code_pushargi_i: + case jit_code_pushargi_ui: + case jit_code_pushargi_l: + case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_int; break; case jit_code_reti_f: case jit_code_pushargi_f: @@ -1300,7 +1376,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_allocai: mask = jit_cc_a0_int|jit_cc_a1_int; break; - case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: + case jit_code_arg_f: case jit_code_arg_d: mask = jit_cc_a0_int|jit_cc_a0_arg; break; case jit_code_calli: case jit_code_jmpi: @@ -1324,11 +1402,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_getarg_f: case jit_code_getarg_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg; break; - case jit_code_putargr: case jit_code_putargr_f: - case jit_code_putargr_d: + case jit_code_putargr_c:case jit_code_putargr_uc: + case jit_code_putargr_s:case jit_code_putargr_us: + case jit_code_putargr_i:case jit_code_putargr_ui: + case jit_code_putargr_l: + case jit_code_putargr_f:case jit_code_putargr_d: mask = jit_cc_a0_reg|jit_cc_a1_arg; break; - case jit_code_putargi: + case jit_code_putargi_c:case jit_code_putargi_uc: + case jit_code_putargi_s:case jit_code_putargi_us: + case jit_code_putargi_i:case jit_code_putargi_ui: + case jit_code_putargi_l: mask = jit_cc_a0_int|jit_cc_a1_arg; break; case jit_code_putargi_f: @@ -1359,6 +1443,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_truncr_f_i: case jit_code_truncr_f_l: case jit_code_truncr_d_i: case jit_code_truncr_d_l: case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul: + case jit_code_bswapr_us: case jit_code_bswapr_ui: case jit_code_bswapr_ul: case jit_code_ldr_c: case jit_code_ldr_uc: case jit_code_ldr_s: case jit_code_ldr_us: case jit_code_ldr_i: case jit_code_ldr_ui: case jit_code_ldr_l: case jit_code_negr_f: @@ -1510,6 +1595,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_bxsubr: case jit_code_bxsubr_u: mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_reg; break; + case jit_code_movnr: case jit_code_movzr: + mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg; + break; + case jit_code_casr: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; + case jit_code_casi: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; default: abort(); } @@ -1579,57 +1675,195 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) label->link = instr; } -void -_jit_optimize(jit_state_t *_jit) +static void +_do_setup(jit_state_t *_jit) { - jit_bool_t jump; - jit_bool_t todo; - jit_int32_t mask; - jit_node_t *node; jit_block_t *block; jit_word_t offset; - _jitc->function = NULL; - - thread_jumps(); - sequential_labels(); - split_branches(); - /* create initial mapping of live register values * at the start of a basic block */ for (offset = 0; offset < _jitc->blocks.offset; offset++) { block = _jitc->blocks.ptr + offset; if (!block->label) continue; - if (block->label->code != jit_code_epilog) - jit_setup(block); + if (block->label->code == jit_code_epilog) { + jit_regset_setbit(&block->reglive, JIT_RET); + jit_regset_setbit(&block->reglive, JIT_FRET); + jit_regset_com(&block->regmask, &block->reglive); + continue; + } + jit_setup(block); } +} + +static jit_bool_t +_block_update_set(jit_state_t *_jit, + jit_block_t *block, jit_block_t *target) +{ + jit_regset_t regmask; + + jit_regset_ior(®mask, &block->reglive, &target->reglive); + jit_regset_and(®mask, ®mask, &block->regmask); + if (jit_regset_set_p(®mask)) { + jit_regset_ior(&block->reglive, &block->reglive, ®mask); + jit_regset_and(®mask, &block->reglive, &block->regmask); + jit_regset_com(®mask, ®mask); + jit_regset_and(&block->regmask, &block->regmask, ®mask); + block->again = 1; + return (1); + } + return (0); +} + +static void +_propagate_backward(jit_state_t *_jit, jit_block_t *block) +{ + jit_block_t *prev; + jit_word_t offset; + + for (offset = block->label->v.w - 1; + offset >= 0; --offset) { + prev = _jitc->blocks.ptr + offset; + if (!block_update_set(prev, block) || + !(prev->label->flag & jit_flag_head)) + break; + } +} + +static jit_bool_t +_check_block_again(jit_state_t *_jit) +{ + jit_int32_t todo; + jit_word_t offset; + jit_node_t *node, *label; + jit_block_t *block, *target; + + todo = 0; + for (offset = 0; offset < _jitc->blocks.offset; offset++) { + block = _jitc->blocks.ptr + offset; + if (block->again) { + todo = 1; + break; + } + } + /* If no block changed state */ + if (!todo) + return (0); - /* set live state of registers not referenced in a block, but - * referenced in a jump target or normal flow */ do { todo = 0; - for (offset = 0; offset < _jitc->blocks.offset; offset++) { - block = _jitc->blocks.ptr + offset; - if (!block->label) + block = NULL; + for (node = _jitc->head; node; node = node->next) { + /* Special jumps that match jit_cc_a0_jmp */ + if (node->code == jit_code_calli || node->code == jit_code_callr) continue; - if (block->label->code != jit_code_epilog) - jit_follow(block, &todo); + + /* Remember current label */ + if (node->code == jit_code_label || + node->code == jit_code_prolog || + node->code == jit_code_epilog) { + + /* If previous block does not pass through */ + if (!(node->flag & jit_flag_head)) + block = NULL; + + target = _jitc->blocks.ptr + node->v.w; + if (block && target->again && block_update_set(block, target)) { + propagate_backward(block); + todo = 1; + } + block = target; + } + /* If not the first jmpi */ + else if (block) { + /* If a jump to dynamic address or if a jump to raw address */ + if (!(jit_classify(node->code) & jit_cc_a0_jmp) || + !(node->flag & jit_flag_node)) + continue; + label = node->u.n; + /* Mark predecessor needs updating due to target change */ + target = _jitc->blocks.ptr + label->v.w; + if (target->again && block_update_set(block, target)) { + propagate_backward(block); + todo = 1; + } + } } - } while (todo); + } + while (todo); + + return (todo); +} + +static void +_do_follow(jit_state_t *_jit, jit_bool_t always) +{ + jit_block_t *block; + jit_word_t offset; + + /* set live state of registers not referenced in a block, but + * referenced in a jump target or normal flow */ + for (offset = 0; offset < _jitc->blocks.offset; offset++) { + block = _jitc->blocks.ptr + offset; + if (!block->label || block->label->code == jit_code_epilog) + continue; + if (always || block->again) { + block->again = 0; + jit_follow(block); + } + } +} + +void +_jit_optimize(jit_state_t *_jit) +{ + jit_bool_t jump; + jit_bool_t todo; + jit_int32_t mask; + jit_node_t *node; + jit_block_t *block; + jit_word_t offset; + jit_regset_t regmask; + + todo = 0; + _jitc->function = NULL; + + thread_jumps(); + sequential_labels(); + split_branches(); + do_setup(); + do_follow(1); patch_registers(); - simplify(); + if (simplify()) + todo = 1; - /* figure out labels that are only reached with a jump - * and is required to do a simple redundant_store removal - * on jit_beqi below */ + jit_regset_set_ui(®mask, 0); + for (offset = 0; offset < _jitc->reglen; offset++) { + if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) && + (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav) + jit_regset_setbit(®mask, offset); + } + + /* Figure out labels that are only reached with a jump */ jump = 1; for (node = _jitc->head; node; node = node->next) { switch (node->code) { case jit_code_label: - if (!jump) + if (!jump) { node->flag |= jit_flag_head; + if (!node->link) { + /* Block is dead code or only reachable with an + * indirect jumps. In such condition, must assume + * all callee save registers are live. */ + block = _jitc->blocks.ptr + node->v.w; + jit_regset_ior(&block->reglive, + &block->reglive, ®mask); + /* Cleanup regmask */ + block_update_set(block, block); + } + } break; case jit_code_jmpi: case jit_code_jmpr: case jit_code_epilog: @@ -1651,69 +1885,98 @@ _jit_optimize(jit_state_t *_jit) node->v.w &= ~jit_regno_patch; if (mask & jit_cc_a2_reg) node->w.w &= ~jit_regno_patch; - switch (node->code) { - case jit_code_prolog: - _jitc->function = _jitc->functions.ptr + node->w.w; - break; - case jit_code_epilog: - _jitc->function = NULL; - break; - case jit_code_beqi: - redundant_store(node, 1); - break; - case jit_code_bnei: - redundant_store(node, 0); - break; - default: + if (node->code == jit_code_beqi) { + if (redundant_store(node, 1)) { + block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w; + block->again = 1; + todo = 1; + } + } + else if (node->code == jit_code_bnei) { + if (redundant_store(node, 0)) { + block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w; + block->again = 1; + todo = 1; + } + } + } + + if (!todo) + todo = check_block_again(); + + /* If instructions were removed or first pass did modify the entry + * state of any block */ + if (todo) { + do_setup(); + todo = 0; + do { + do_follow(0); + /* If any block again has the entry state modified. */ + todo = check_block_again(); + } while (todo); + } + + for (node = _jitc->head; node; node = node->next) { + mask = jit_classify(node->code); + if (mask & jit_cc_a0_reg) + node->u.w &= ~jit_regno_patch; + if (mask & jit_cc_a1_reg) + node->v.w &= ~jit_regno_patch; + if (mask & jit_cc_a2_reg) + node->w.w &= ~jit_regno_patch; + if (node->code == jit_code_prolog) + _jitc->function = _jitc->functions.ptr + node->w.w; + else if(node->code == jit_code_epilog) + _jitc->function = NULL; + else { #if JIT_HASH_CONSTS - if (mask & jit_cc_a0_flt) { - node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a0_dbl) { - node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a1_flt) { - node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a1_dbl) { - node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a2_flt) { - node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a2_dbl) { - node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } + if (mask & jit_cc_a0_flt) { + node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a0_dbl) { + node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a1_flt) { + node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a1_dbl) { + node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a2_flt) { + node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a2_dbl) { + node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } #endif - if (_jitc->function) { - if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) == - (jit_cc_a0_reg|jit_cc_a0_chg)) { - if (mask & jit_cc_a0_rlh) { - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.q.l)); - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.q.h)); - } - else - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.w)); - } - if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) == - (jit_cc_a1_reg|jit_cc_a1_chg)) + if (_jitc->function) { + if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) == + (jit_cc_a0_reg|jit_cc_a0_chg)) { + if (mask & jit_cc_a0_rlh) { + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->u.q.l)); jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->v.w)); - if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) == - (jit_cc_a2_reg|jit_cc_a2_chg)) + jit_regno(node->u.q.h)); + } + else jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->w.w)); + jit_regno(node->u.w)); } - break; + if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) == + (jit_cc_a1_reg|jit_cc_a1_chg)) + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->v.w)); + if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) == + (jit_cc_a2_reg|jit_cc_a2_chg)) + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->w.w)); + } } } } @@ -1729,6 +1992,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) case jit_code_label: case jit_code_prolog: case jit_code_epilog: block = _jitc->blocks.ptr + node->v.w; jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set_ui(&_jitc->explive, 0); + break; + case jit_code_live: + jit_regset_setbit(&_jitc->explive, node->u.w); break; case jit_code_callr: value = jit_regno(node->u.w); @@ -1784,13 +2051,24 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) else jit_regset_setbit(&_jitc->reglive, node->v.w); } - if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) { - if (value & jit_cc_a2_chg) { - jit_regset_clrbit(&_jitc->reglive, node->w.w); - jit_regset_setbit(&_jitc->regmask, node->w.w); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + /* Assume registers are not changed */ + if (!(node->w.q.l & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.l); + if (!(node->w.q.h & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.h); + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (value & jit_cc_a2_chg) { + jit_regset_clrbit(&_jitc->reglive, node->w.w); + jit_regset_setbit(&_jitc->regmask, node->w.w); + } + else + jit_regset_setbit(&_jitc->reglive, node->w.w); + } } - else - jit_regset_setbit(&_jitc->reglive, node->w.w); } if (jit_regset_set_p(&_jitc->regmask)) { jit_update(node->next, &_jitc->reglive, &_jitc->regmask); @@ -1818,8 +2096,27 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + } + /* Prevent incorrect detection of running out of registers + * if will need to patch jump, and all registers have been + * used in the current block. */ + if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) { + jit_node_t *label = node->u.n; + jit_block_t *block = _jitc->blocks.ptr + label->v.w; + jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set(&_jitc->regmask, &block->regmask); + if (jit_regset_set_p(&_jitc->explive)) { + jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive); + jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive); + } + } } void @@ -1835,8 +2132,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + } } void @@ -1865,6 +2168,9 @@ _jit_dataset(jit_state_t *_jit) #endif assert(!_jitc->dataset); +#if !HAVE_MMAP + assert(_jit->user_data); +#else if (!_jit->user_data) { /* create read only data buffer */ @@ -1882,6 +2188,7 @@ _jit_dataset(jit_state_t *_jit) close(mmap_fd); #endif } +#endif /* !HAVE_MMAP */ if (!_jitc->no_data) jit_memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset); @@ -1989,6 +2296,7 @@ _jit_emit(jit_state_t *_jit) #if defined(__sgi) int mmap_fd; #endif + int mmap_prot, mmap_flags; if (!_jitc->realize) jit_realize(); @@ -1998,20 +2306,39 @@ _jit_emit(jit_state_t *_jit) _jitc->emit = 1; +#if !HAVE_MMAP + assert(_jit->user_code); +#else if (!_jit->user_code) { + mmap_prot = PROT_READ | PROT_WRITE; +#if !(__OpenBSD__ || __APPLE__) + mmap_prot |= PROT_EXEC; +#endif +#if __NetBSD__ + mmap_prot = PROT_MPROTECT(mmap_prot); + mmap_flags = 0; +#else + mmap_flags = MAP_PRIVATE; +#endif + mmap_flags |= MAP_ANON; #if defined(__sgi) mmap_fd = open("/dev/zero", O_RDWR); #endif _jit->code.ptr = mmap(NULL, _jit->code.length, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot, mmap_flags, mmap_fd, 0); assert(_jit->code.ptr != MAP_FAILED); } +#endif /* !HAVE_MMAP */ _jitc->code.end = _jit->code.ptr + _jit->code.length - jit_get_max_instr(); _jit->pc.uc = _jit->code.ptr; for (;;) { +#if __NetBSD__ + result = mprotect(_jit->code.ptr, _jit->code.length, + PROT_READ | PROT_WRITE); + assert(result == 0); +#endif if ((code = emit_code()) == NULL) { _jitc->patches.offset = 0; for (node = _jitc->head; node; node = node->next) { @@ -2020,6 +2347,9 @@ _jit_emit(jit_state_t *_jit) node->code == jit_code_epilog)) node->flag &= ~jit_flag_patch; } +#if !HAVE_MMAP + assert(_jit->user_code); +#else if (_jit->user_code) goto fail; /* Should only happen on very special cases */ @@ -2039,8 +2369,7 @@ _jit_emit(jit_state_t *_jit) # endif #else _jit->code.ptr = mmap(NULL, length, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot, mmap_flags, mmap_fd, 0); #endif assert(_jit->code.ptr != MAP_FAILED); @@ -2048,6 +2377,7 @@ _jit_emit(jit_state_t *_jit) _jitc->code.end = _jit->code.ptr + _jit->code.length - jit_get_max_instr(); _jit->pc.uc = _jit->code.ptr; +#endif /* !HAVE_MMAP */ } else break; @@ -2064,6 +2394,7 @@ _jit_emit(jit_state_t *_jit) if (_jit->user_data) jit_free((jit_pointer_t *)&_jitc->data.ptr); +#if HAVE_MMAP #ifdef NDEBUG else mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); @@ -2072,21 +2403,53 @@ _jit_emit(jit_state_t *_jit) PROT_READ | PROT_EXEC); #else else { - result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); + result = mprotect(_jit->data.ptr, + _jit->data.length, PROT_READ); assert(result == 0); } if (!_jit->user_code) { - result = mprotect(_jit->code.ptr, _jit->code.length, - PROT_READ | PROT_EXEC); + _jit->code.protected = _jit->pc.uc - _jit->code.ptr; +# if __riscv && __WORDSIZE == 64 + /* FIXME should start adding consts at a page boundary */ + _jit->code.protected -= _jitc->consts.hash.count * sizeof(jit_word_t); +# endif + result = mprotect(_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC); assert(result == 0); } #endif +#endif /* HAVE_MMAP */ return (_jit->code.ptr); fail: return (NULL); } +void +_jit_protect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else + int result; + if (_jit->user_code) return; + result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC); + assert (result == 0); +#endif +} + +void +_jit_unprotect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else + int result; + if (_jit->user_code) return; + result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_WRITE); + assert (result == 0); +#endif +} + void _jit_frame(jit_state_t *_jit, jit_int32_t frame) { @@ -2202,7 +2565,7 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block) * or normal flow that have a live register not used in this block. */ static void -_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) +_jit_follow(jit_state_t *_jit, jit_block_t *block) { jit_node_t *node; jit_block_t *next; @@ -2231,7 +2594,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) /* Remove from unknown state bitmask. */ jit_regset_com(®temp, ®temp); jit_regset_and(&block->regmask, &block->regmask, ®temp); - *todo = 1; + block->again = 1; } case jit_code_prolog: case jit_code_epilog: @@ -2259,11 +2622,26 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->w.w)) { - jit_regset_clrbit(®mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(®live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(®mask, node->w.q.l)) + jit_regset_clrbit(®mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.q.h)) + jit_regset_clrbit(®mask, node->w.q.h); + } + } + else { + if (value & jit_cc_a2_reg) { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.w)) { + jit_regset_clrbit(®mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(®live, node->w.w); + } + } } } } @@ -2318,7 +2696,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) jit_regset_com(®temp, ®temp); jit_regset_and(&block->regmask, &block->regmask, ®temp); - *todo = 1; + block->again = 1; } } else { @@ -2331,19 +2709,19 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) * means that only JIT_Vn registers can be trusted on * arrival of jmpr. */ + jit_regset_set_ui(®mask, 0); for (regno = 0; regno < _jitc->reglen; regno++) { spec = jit_class(_rvs[regno].spec); - if (jit_regset_tstbit(®mask, regno) && - (spec & (jit_class_gpr|jit_class_fpr)) && - !(spec & jit_class_sav)) - jit_regset_clrbit(®mask, regno); + if ((spec & (jit_class_gpr|jit_class_fpr)) && + (spec & jit_class_sav)) + jit_regset_setbit(®mask, regno); } /* Assume non callee save registers are live due * to jump to unknown location. */ /* Treat all callee save as live. */ - jit_regset_ior(®live, ®live, ®mask); + jit_regset_ior(&block->reglive, ®live, ®mask); /* Treat anything else as dead. */ - jit_regset_set_ui(®mask, 0); + return; } } break; @@ -2410,11 +2788,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(mask, node->w.w)) { - jit_regset_clrbit(mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(mask, node->w.q.l)) + jit_regset_clrbit(mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.q.h)) + jit_regset_clrbit(mask, node->w.q.h); + } + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.w)) { + jit_regset_clrbit(mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(live, node->w.w); + } } } } @@ -2479,19 +2870,22 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, * means that only JIT_Vn registers can be trusted on * arrival of jmpr. */ + jit_regset_set_ui(mask, 0); for (regno = 0; regno < _jitc->reglen; regno++) { spec = jit_class(_rvs[regno].spec); - if (jit_regset_tstbit(mask, regno) && - (spec & (jit_class_gpr|jit_class_fpr)) && - !(spec & jit_class_sav)) - jit_regset_clrbit(mask, regno); + if ((spec & (jit_class_gpr|jit_class_fpr)) && + (spec & jit_class_sav)) + jit_regset_setbit(mask, regno); } /* Assume non callee save registers are live due * to jump to unknown location. */ /* Treat all callee save as live. */ jit_regset_ior(live, live, mask); + /* Prevent explicitly set as live registers to + * be used as a temporary for the jmpi. */ + jit_regset_ior(live, live, &_jitc->explive); /* Treat anything else as dead. */ - jit_regset_set_ui(mask, 0); + return; } } break; @@ -2593,36 +2987,59 @@ _split_branches(jit_state_t *_jit) jit_node_t *next; jit_node_t *label; jit_block_t *block; + jit_block_t *blocks; + jit_word_t offset; + jit_word_t length; + length = _jitc->blocks.length; + jit_alloc((jit_pointer_t *)&blocks, length * sizeof(jit_block_t)); + if ((node = _jitc->head) && + (node->code == jit_code_label || node->code == jit_code_prolog)) { + block = _jitc->blocks.ptr + node->v.w; + memcpy(blocks, block, sizeof(jit_block_t)); + node->v.w = 0; + offset = 1; + } + else + offset = 0; for (node = _jitc->head; node; node = next) { if ((next = node->next)) { if (next->code == jit_code_label || next->code == jit_code_prolog || - next->code == jit_code_epilog) - continue; + next->code == jit_code_epilog) { + if (offset >= length) { + jit_realloc((jit_pointer_t *)&blocks, + length * sizeof(jit_block_t), + (length + 16) * sizeof(jit_block_t)); + length += 16; + } + block = _jitc->blocks.ptr + next->v.w; + memcpy(blocks + offset, block, sizeof(jit_block_t)); + next->v.w = offset++; + } /* split block on branches */ - if (jit_classify(node->code) & jit_cc_a0_jmp) { + else if (jit_classify(node->code) & jit_cc_a0_jmp) { label = new_node(jit_code_label); label->next = next; node->next = label; - if (_jitc->blocks.offset >= _jitc->blocks.length) { - jit_word_t length; - - length = _jitc->blocks.length + 16; - jit_realloc((jit_pointer_t *)&_jitc->blocks.ptr, - _jitc->blocks.length * sizeof(jit_block_t), - length * sizeof(jit_block_t)); - _jitc->blocks.length = length; + if (offset >= length) { + jit_realloc((jit_pointer_t *)&blocks, + length * sizeof(jit_block_t), + (length + 16) * sizeof(jit_block_t)); + length += 16; } - block = _jitc->blocks.ptr + _jitc->blocks.offset; + block = blocks + offset; block->label = label; - label->v.w = _jitc->blocks.offset; + label->v.w = offset++; jit_regset_new(&block->reglive); jit_regset_new(&block->regmask); - ++_jitc->blocks.offset; } } } + jit_free((jit_pointer_t *)&_jitc->blocks.ptr); + _jitc->blocks.ptr = blocks; + _jitc->blocks.offset = offset; + _jitc->blocks.length = length; } static jit_bool_t @@ -2887,7 +3304,7 @@ _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) return (0); } -static void +static jit_bool_t _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) { jit_node_t *iter; @@ -2895,30 +3312,33 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) jit_word_t word; jit_int32_t spec; jit_int32_t regno; + jit_bool_t result; if (jump) { prev = node->u.n; if (prev->code == jit_code_epilog) - return; + return (0); assert(prev->code == jit_code_label); if ((prev->flag & jit_flag_head) || node->link || prev->link != node) /* multiple sources */ - return; + return (0); /* if there are sequential labels it will return below */ } else prev = node; + result = 0; word = node->w.w; regno = jit_regno(node->v.w); for (iter = prev->next; iter; prev = iter, iter = iter->next) { switch (iter->code) { case jit_code_label: case jit_code_prolog: case jit_code_epilog: - return; + return (result); case jit_code_movi: if (regno == jit_regno(iter->u.w)) { if (iter->flag || iter->v.w != word) - return; + return (result); + result = 1; del_node(prev, iter); iter = prev; } @@ -2926,32 +3346,34 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) default: spec = jit_classify(iter->code); if (spec & jit_cc_a0_jmp) - return; + return (result); if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) == (jit_cc_a0_reg|jit_cc_a0_chg)) { if (spec & jit_cc_a0_rlh) { if (regno == jit_regno(iter->u.q.l) || regno == jit_regno(iter->u.q.h)) - return; + return (result); } else { if (regno == jit_regno(iter->u.w)) - return; + return (result); } } if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) == (jit_cc_a1_reg|jit_cc_a1_chg)) { if (regno == jit_regno(iter->v.w)) - return; + return (result); } if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) == (jit_cc_a2_reg|jit_cc_a2_chg)) { if (regno == jit_regno(iter->w.w)) - return; + return (result); } break; } } + + return (result); } static jit_bool_t @@ -3098,7 +3520,6 @@ _simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) /* no multiple information, so, if set to a constant, * prefer to keep that information */ if (value->kind == 0) { - value->kind = jit_kind_code; switch (node->code) { /* no information about signed/unsigned either */ case jit_code_stxi_c: value->code = jit_code_ldxi_c; break; @@ -3139,7 +3560,7 @@ _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) * once to the same value, and is a common pattern of calls * to jit_pushargi and jit_pushargr */ -static void +static jit_bool_t _simplify(jit_state_t *_jit) { jit_node_t *prev; @@ -3147,7 +3568,9 @@ _simplify(jit_state_t *_jit) jit_node_t *next; jit_int32_t info; jit_int32_t regno; + jit_bool_t result; + result = 0; for (prev = NULL, node = _jitc->head; node; prev = node, node = next) { next = node->next; switch (node->code) { @@ -3170,6 +3593,7 @@ _simplify(jit_state_t *_jit) * already holding */ patch_register(node->link->next, node, jit_regno_patch|regno, regno); + result = 1; del_node(_jitc->spill[regno], node->link); del_node(prev, node); node = prev; @@ -3179,38 +3603,50 @@ _simplify(jit_state_t *_jit) case jit_code_movr: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_word, sizeof(jit_word_t))) + jit_kind_word, sizeof(jit_word_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_word, sizeof(jit_word_t))) + jit_kind_word, sizeof(jit_word_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movr_f: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_float32, sizeof(jit_float32_t))) + jit_kind_float32, sizeof(jit_float32_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi_f: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_float32, sizeof(jit_float32_t))) + jit_kind_float32, sizeof(jit_float32_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movr_d: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_float64, sizeof(jit_float64_t))) + jit_kind_float64, sizeof(jit_float64_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi_d: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_float64, sizeof(jit_float64_t))) + jit_kind_float64, sizeof(jit_float64_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_ldxi_c: case jit_code_ldxi_uc: case jit_code_ldxi_s: case jit_code_ldxi_us: @@ -3218,15 +3654,19 @@ _simplify(jit_state_t *_jit) case jit_code_ldxi_l: case jit_code_ldxi_f: case jit_code_ldxi_d: regno = jit_regno(node->u.w); - if (simplify_ldxi(prev, node)) + if (simplify_ldxi(prev, node)) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_stxi_c: case jit_code_stxi_s: case jit_code_stxi_i: case jit_code_stxi_l: case jit_code_stxi_f: case jit_code_stxi_d: regno = jit_regno(node->u.w); - if (simplify_stxi(prev, node)) + if (simplify_stxi(prev, node)) { + result = 1; simplify_spill(node = prev, regno); + } break; default: info = jit_classify(node->code); @@ -3255,13 +3695,29 @@ _simplify(jit_state_t *_jit) ++_jitc->gen[regno]; } if (info & jit_cc_a2_chg) { - regno = jit_regno(node->w.w); - _jitc->values[regno].kind = 0; - ++_jitc->gen[regno]; +#if 0 + /* Assume registers are not changed */ + if (info & jit_cc_a2_rlh) { + regno = jit_regno(node->w.q.l); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + regno = jit_regno(node->w.q.h); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + } + else { +#endif + regno = jit_regno(node->w.w); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; +#if 0 + } +#endif } break; } } + return (result); } static jit_int32_t @@ -3282,7 +3738,7 @@ _register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, default: value = jit_classify(node->code); /* lack of extra information */ - if (value & jit_cc_a0_jmp) + if (value & (jit_cc_a0_jmp|jit_cc_a0_cnd)) return (jit_reg_change); else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) == (jit_cc_a0_reg|jit_cc_a0_chg) && @@ -3462,11 +3918,46 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, } if ((value & jit_cc_a1_reg) && node->v.w == regno) node->v.w = patch; - if ((value & jit_cc_a2_reg) && node->w.w == regno) - node->w.w = patch; + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + if (node->w.q.l == regno) + node->w.q.l = patch; + if (node->w.q.h == regno) + node->w.q.h = patch; + } + else { + if (node->w.w == regno) + node->w.w = patch; + } + } } } +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define htonr_us(r0,r1) bswapr_us(r0,r1) +# define htonr_ui(r0,r1) bswapr_ui(r0,r1) +# if __WORDSIZE == 64 +# define htonr_ul(r0,r1) bswapr_ul(r0,r1) +# endif +#else +# define htonr_us(r0,r1) extr_us(r0,r1) +# if __WORDSIZE == 32 +# define htonr_ui(r0,r1) movr(r0,r1) +# else +# define htonr_ui(r0,r1) extr_ui(r0,r1) +# define htonr_ul(r0,r1) movr(r0,r1) +# endif +#endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#endif + #if defined(__i386__) || defined(__x86_64__) # include "jit_x86.c" #elif defined(__mips__) @@ -3489,4 +3980,50 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, # include "jit_alpha.c" #elif defined(__riscv) # include "jit_riscv.c" +#elif defined(__loongarch__) +# include "jit_loongarch.c" +#endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 8); + andi(r0, r1, 0xff); + andi(rn(reg), rn(reg), 0xff); + lshi(r0, r0, 8); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 16); + bswapr_us(r0, r1); + bswapr_us(rn(reg), rn(reg)); + lshi(r0, r0, 16); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi_u(rn(reg), r1, 32); + bswapr_ui(r0, r1); + bswapr_ui(rn(reg), rn(reg)); + lshi(r0, r0, 32); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} #endif diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 9898d9461..0fef3f2b7 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 0df4ec86ba664dad3b4cc24fd3199131e8e3219f - parent = 364a705dc70b57a734b4e362226a386b34a008fb + commit = d640c6b484ac4936db16d865e4dc8850c1b5e122 + parent = ffa840032d55d2fd54f8546f332f91e6b8bbe495 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 809d0b776..8407c5893 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -project(lightrec LANGUAGES C VERSION 0.4) +project(lightrec LANGUAGES C VERSION 0.6) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") if (NOT BUILD_SHARED_LIBS) @@ -68,6 +68,7 @@ option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON) option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON) option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON) option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON) +option(OPT_FLAG_IO "(optimization) Flag I/O opcodes whose target is known" ON) option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON) option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON) @@ -90,15 +91,6 @@ if (CMAKE_C_COMPILER_ID STREQUAL "Clang") target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides) endif() -option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF) -if (ENABLE_TINYMM) - find_library(TINYMM_LIBRARIES tinymm REQUIRED) - find_path(TINYMM_INCLUDE_DIR tinymm.h REQUIRED) - - include_directories(${TINYMM_INCLUDE_DIR}) - target_link_libraries(${PROJECT_NAME} PRIVATE ${TINYMM_LIBRARIES}) -endif (ENABLE_TINYMM) - if (ENABLE_THREADED_COMPILER) find_library(PTHREAD_LIBRARIES pthread REQUIRED) find_path(PTHREAD_INCLUDE_DIR pthread.h REQUIRED) @@ -107,6 +99,12 @@ if (ENABLE_THREADED_COMPILER) target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES}) endif (ENABLE_THREADED_COMPILER) +option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF) +if (ENABLE_CODE_BUFFER) + target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c) + target_include_directories(${PROJECT_NAME} PRIVATE tlsf) +endif (ENABLE_CODE_BUFFER) + find_library(LIBLIGHTNING lightning REQUIRED) find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED) @@ -118,7 +116,7 @@ if (LOG_LEVEL STREQUAL Debug) target_sources(${PROJECT_NAME} PRIVATE disassembler.c) endif() -configure_file(config.h.cmakein config.h @ONLY) +configure_file(lightrec-config.h.cmakein lightrec-config.h @ONLY) include(GNUInstallDirs) install(TARGETS ${PROJECT_NAME} diff --git a/deps/lightrec/README.md b/deps/lightrec/README.md index 40ecc8f6a..ab2c13b5f 100644 --- a/deps/lightrec/README.md +++ b/deps/lightrec/README.md @@ -50,4 +50,6 @@ Lightrec has been ported to the following emulators: * [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all) -* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) \ No newline at end of file +* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) + +[![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date) diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c index 4512392dc..bb58cdb16 100644 --- a/deps/lightrec/blockcache.c +++ b/deps/lightrec/blockcache.c @@ -7,6 +7,8 @@ #include "debug.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" +#include "recompiler.h" #include #include @@ -63,8 +65,8 @@ void remove_from_code_lut(struct blockcache *cache, struct block *block) u32 offset = lut_offset(block->pc); if (block->function) { - memset(&state->code_lut[offset], 0, - block->nb_ops * sizeof(*state->code_lut)); + memset(lut_address(state, offset), 0, + block->nb_ops * lut_elm_size(state)); } } @@ -102,18 +104,64 @@ void lightrec_unregister_block(struct blockcache *cache, struct block *block) pr_err("Block at PC 0x%x is not in cache\n", block->pc); } -void lightrec_free_block_cache(struct blockcache *cache) +static bool lightrec_block_is_old(const struct lightrec_state *state, + const struct block *block) +{ + u32 diff = state->current_cycle - block->precompile_date; + + return diff > (1 << 27); /* About 4 seconds */ +} + +static void lightrec_free_blocks(struct blockcache *cache, + const struct block *except, bool all) { + struct lightrec_state *state = cache->state; struct block *block, *next; + bool outdated = all; unsigned int i; + u8 old_flags; for (i = 0; i < LUT_SIZE; i++) { for (block = cache->lut[i]; block; block = next) { next = block->next; - lightrec_free_block(cache->state, block); + + if (except && block == except) + continue; + + if (!all) { + outdated = lightrec_block_is_old(state, block) || + lightrec_block_is_outdated(state, block); + } + + if (!outdated) + continue; + + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + + if (!(old_flags & BLOCK_IS_DEAD)) { + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + pr_debug("Freeing outdated block at PC 0x%08x\n", block->pc); + remove_from_code_lut(cache, block); + lightrec_unregister_block(cache, block); + lightrec_free_block(state, block); + } } } +} + +void lightrec_remove_outdated_blocks(struct blockcache *cache, + const struct block *except) +{ + pr_info("Running out of code space. Cleaning block cache...\n"); + lightrec_free_blocks(cache, except, false); +} + +void lightrec_free_block_cache(struct blockcache *cache) +{ + lightrec_free_blocks(cache, NULL, true); lightrec_free(cache->state, MEM_FOR_LIGHTREC, sizeof(*cache), cache); } @@ -150,22 +198,53 @@ u32 lightrec_calculate_block_hash(const struct block *block) return hash; } +static void lightrec_reset_lut_offset(struct lightrec_state *state, void *d) +{ + u32 pc = (u32)(uintptr_t) d; + struct block *block; + void *addr; + + block = lightrec_find_block(state->block_cache, pc); + if (!block) + return; + + if (block_has_flag(block, BLOCK_IS_DEAD)) + return; + + addr = block->function ?: state->get_next_block; + lut_write(state, lut_offset(pc), addr); +} + bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block) { - void **lut_entry = &state->code_lut[lut_offset(block->pc)]; + u32 offset = lut_offset(block->pc); bool outdated; - if (*lut_entry) + if (lut_read(state, offset)) return false; outdated = block->hash != lightrec_calculate_block_hash(block); if (likely(!outdated)) { /* The block was marked as outdated, but the content is still * the same */ - if (block->function) - *lut_entry = block->function; - else - *lut_entry = state->get_next_block; + + if (ENABLE_THREADED_COMPILER) { + /* + * When compiling a block that covers ours, the threaded + * compiler will set the LUT entries of the various + * entry points. Therefore we cannot write the LUT here, + * as we would risk overwriting the new entry points. + * Leave it to the reaper to re-install the LUT entries. + */ + + lightrec_reaper_add(state->reaper, + lightrec_reset_lut_offset, + (void *)(uintptr_t) block->pc); + } else if (block->function) { + lut_write(state, offset, block->function); + } else { + lut_write(state, offset, state->get_next_block); + } } return outdated; diff --git a/deps/lightrec/blockcache.h b/deps/lightrec/blockcache.h index 3b782f479..2e55ff657 100644 --- a/deps/lightrec/blockcache.h +++ b/deps/lightrec/blockcache.h @@ -24,4 +24,7 @@ void lightrec_free_block_cache(struct blockcache *cache); u32 lightrec_calculate_block_hash(const struct block *block); _Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block); +void lightrec_remove_outdated_blocks(struct blockcache *cache, + const struct block *except); + #endif /* __BLOCKCACHE_H__ */ diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 0c193da1e..bef95948f 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -11,7 +11,7 @@ #include "lightrec-private.h" #include "regcache.h" -static const char *std_opcodes[] = { +static const char * const std_opcodes[] = { [OP_J] = "j ", [OP_JAL] = "jal ", [OP_BEQ] = "beq ", @@ -40,11 +40,9 @@ static const char *std_opcodes[] = { [OP_SWR] = "swr ", [OP_LWC2] = "lwc2 ", [OP_SWC2] = "swc2 ", - [OP_META_BEQZ] = "beqz ", - [OP_META_BNEZ] = "bnez ", }; -static const char *special_opcodes[] = { +static const char * const special_opcodes[] = { [OP_SPECIAL_SLL] = "sll ", [OP_SPECIAL_SRL] = "srl ", [OP_SPECIAL_SRA] = "sra ", @@ -75,14 +73,14 @@ static const char *special_opcodes[] = { [OP_SPECIAL_SLTU] = "sltu ", }; -static const char *regimm_opcodes[] = { +static const char * const regimm_opcodes[] = { [OP_REGIMM_BLTZ] = "bltz ", [OP_REGIMM_BGEZ] = "bgez ", [OP_REGIMM_BLTZAL] = "bltzal ", [OP_REGIMM_BGEZAL] = "bgezal ", }; -static const char *cp0_opcodes[] = { +static const char * const cp0_opcodes[] = { [OP_CP0_MFC0] = "mfc0 ", [OP_CP0_CFC0] = "cfc0 ", [OP_CP0_MTC0] = "mtc0 ", @@ -90,46 +88,102 @@ static const char *cp0_opcodes[] = { [OP_CP0_RFE] = "rfe", }; -static const char *cp2_opcodes[] = { +static const char * const cp2_basic_opcodes[] = { [OP_CP2_BASIC_MFC2] = "mfc2 ", [OP_CP2_BASIC_CFC2] = "cfc2 ", [OP_CP2_BASIC_MTC2] = "mtc2 ", [OP_CP2_BASIC_CTC2] = "ctc2 ", }; -static const char *opcode_flags[] = { +static const char * const cp2_opcodes[] = { + [OP_CP2_RTPS] = "rtps ", + [OP_CP2_NCLIP] = "nclip ", + [OP_CP2_OP] = "op ", + [OP_CP2_DPCS] = "dpcs ", + [OP_CP2_INTPL] = "intpl ", + [OP_CP2_MVMVA] = "mvmva ", + [OP_CP2_NCDS] = "ncds ", + [OP_CP2_CDP] = "cdp ", + [OP_CP2_NCDT] = "ncdt ", + [OP_CP2_NCCS] = "nccs ", + [OP_CP2_CC] = "cc ", + [OP_CP2_NCS] = "ncs ", + [OP_CP2_NCT] = "nct ", + [OP_CP2_SQR] = "sqr ", + [OP_CP2_DCPL] = "dcpl ", + [OP_CP2_DPCT] = "dpct ", + [OP_CP2_AVSZ3] = "avsz3 ", + [OP_CP2_AVSZ4] = "avsz4 ", + [OP_CP2_RTPT] = "rtpt ", + [OP_CP2_GPF] = "gpf ", + [OP_CP2_GPL] = "gpl ", + [OP_CP2_NCCT] = "ncct ", +}; + +static const char * const mult2_opcodes[] = { + "mult2 ", "multu2 ", +}; + +static const char * const opcode_flags[] = { "switched branch/DS", - "unload Rs", - "unload Rt", - "unload Rd", "sync point", }; -static const char *opcode_io_flags[] = { - "memory I/O", - "hardware I/O", +static const char * const opcode_io_flags[] = { "self-modifying code", "no invalidation", + "no mask", }; -static const char *opcode_branch_flags[] = { +static const char * const opcode_io_modes[] = { + "Memory access", + "I/O access", + "RAM access", + "BIOS access", + "Scratchpad access", + "Mapped I/O access" +}; + +static const char * const opcode_branch_flags[] = { "emulate branch", "local branch", }; -static const char *opcode_multdiv_flags[] = { +static const char * const opcode_multdiv_flags[] = { "No LO", "No HI", "No div check", }; -static int print_flags(char *buf, size_t len, u16 flags, - const char **array, size_t array_size) +static size_t do_snprintf(char *buf, size_t len, bool *first, + const char *arg1, const char *arg2) { - const char *flag_name; - unsigned int i; + size_t bytes; + + if (*first) + bytes = snprintf(buf, len, "(%s%s", arg1, arg2); + else + bytes = snprintf(buf, len, ", %s%s", arg1, arg2); + + *first = false; + + return bytes; +} + +static const char * const reg_op_token[3] = { + "-", "*", "~", +}; + +static int print_flags(char *buf, size_t len, const struct opcode *op, + const char * const *array, size_t array_size, + bool is_io) +{ + const char *flag_name, *io_mode_name; + unsigned int i, io_mode; size_t count = 0, bytes; bool first = true; + u32 flags = op->flags; + unsigned int reg_op; for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) { if (!(flags & BIT(i))) @@ -140,17 +194,56 @@ static int print_flags(char *buf, size_t len, u16 flags, else flag_name = array[i - ARRAY_SIZE(opcode_flags)]; - if (first) - bytes = snprintf(buf, len, "(%s", flag_name); - else - bytes = snprintf(buf, len, ", %s", flag_name); - - first = false; + bytes = do_snprintf(buf, len, &first, "", flag_name); buf += bytes; len -= bytes; count += bytes; } + if (is_io) { + io_mode = LIGHTREC_FLAGS_GET_IO_MODE(flags); + if (io_mode > 0) { + io_mode_name = opcode_io_modes[io_mode - 1]; + + bytes = do_snprintf(buf, len, &first, "", io_mode_name); + buf += bytes; + len -= bytes; + count += bytes; + } + } + + if (OPT_EARLY_UNLOAD) { + reg_op = LIGHTREC_FLAGS_GET_RS(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rs)); + buf += bytes; + len -= bytes; + count += bytes; + } + + reg_op = LIGHTREC_FLAGS_GET_RT(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rt)); + buf += bytes; + len -= bytes; + count += bytes; + } + + reg_op = LIGHTREC_FLAGS_GET_RD(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->r.rd)); + buf += bytes; + len -= bytes; + count += bytes; + } + } + if (!first) count += snprintf(buf, len, ")"); else @@ -160,7 +253,7 @@ static int print_flags(char *buf, size_t len, u16 flags, } static int print_op_special(union code c, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags) + const char * const **flags_ptr, size_t *nb_flags) { switch (c.r.op) { case OP_SPECIAL_SLL: @@ -190,6 +283,9 @@ static int print_op_special(union code c, char *buf, size_t len, lightrec_reg_name(c.r.rt), lightrec_reg_name(c.r.rs)); case OP_SPECIAL_JR: + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); + fallthrough; case OP_SPECIAL_MTHI: case OP_SPECIAL_MTLO: return snprintf(buf, len, "%s%s", @@ -199,7 +295,7 @@ static int print_op_special(union code c, char *buf, size_t len, return snprintf(buf, len, "%s%s,%s", special_opcodes[c.r.op], lightrec_reg_name(c.r.rd), - lightrec_reg_name(c.r.rt)); + lightrec_reg_name(c.r.rs)); case OP_SPECIAL_SYSCALL: case OP_SPECIAL_BREAK: return snprintf(buf, len, "%s", special_opcodes[c.r.op]); @@ -228,17 +324,14 @@ static int print_op_special(union code c, char *buf, size_t len, static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) { if (cp == 2) { - switch (c.i.rs) { - case OP_CP0_MFC0: - case OP_CP0_CFC0: - case OP_CP0_MTC0: - case OP_CP0_CTC0: + switch (c.r.op) { + case OP_CP2_BASIC: return snprintf(buf, len, "%s%s,%u", - cp2_opcodes[c.i.rs], + cp2_basic_opcodes[c.i.rs], lightrec_reg_name(c.i.rt), c.r.rd); default: - return snprintf(buf, len, "cp2 (0x%08x)", c.opcode); + return snprintf(buf, len, "%s", cp2_opcodes[c.r.op]); } } else { switch (c.i.rs) { @@ -259,7 +352,8 @@ static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) } static int print_op(union code c, u32 pc, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags) + const char * const **flags_ptr, size_t *nb_flags, + bool *is_io) { if (c.opcode == 0) return snprintf(buf, len, "nop "); @@ -276,10 +370,19 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, pc + 4 + ((s16)c.i.imm << 2)); case OP_J: case OP_JAL: + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); return snprintf(buf, len, "%s0x%x", std_opcodes[c.i.op], (pc & 0xf0000000) | (c.j.imm << 2)); case OP_BEQ: + if (c.i.rs == c.i.rt) { + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); + return snprintf(buf, len, "b 0x%x", + pc + 4 + ((s16)c.i.imm << 2)); + } + fallthrough; case OP_BNE: case OP_BLEZ: case OP_BGTZ: @@ -326,6 +429,7 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, case OP_SWR: *flags_ptr = opcode_io_flags; *nb_flags = ARRAY_SIZE(opcode_io_flags); + *is_io = true; return snprintf(buf, len, "%s%s,%hd(%s)", std_opcodes[c.i.op], lightrec_reg_name(c.i.rt), @@ -340,51 +444,63 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, lightrec_reg_name(c.i.rt), (s16)c.i.imm, lightrec_reg_name(c.i.rs)); - case OP_META_BEQZ: - case OP_META_BNEZ: - *flags_ptr = opcode_branch_flags; - *nb_flags = ARRAY_SIZE(opcode_branch_flags); - return snprintf(buf, len, "%s%s,0x%x", - std_opcodes[c.i.op], - lightrec_reg_name(c.i.rs), - pc + 4 + ((s16)c.i.imm << 2)); case OP_META_MOV: return snprintf(buf, len, "move %s,%s", lightrec_reg_name(c.r.rd), lightrec_reg_name(c.r.rs)); + case OP_META_EXTC: + return snprintf(buf, len, "extc %s,%s", + lightrec_reg_name(c.i.rt), + lightrec_reg_name(c.i.rs)); + case OP_META_EXTS: + return snprintf(buf, len, "exts %s,%s", + lightrec_reg_name(c.i.rt), + lightrec_reg_name(c.i.rs)); + case OP_META_MULT2: + case OP_META_MULTU2: + *flags_ptr = opcode_multdiv_flags; + *nb_flags = ARRAY_SIZE(opcode_multdiv_flags); + return snprintf(buf, len, "%s%s,%s,%s,%u", + mult2_opcodes[c.i.op == OP_META_MULTU2], + lightrec_reg_name(get_mult_div_hi(c)), + lightrec_reg_name(get_mult_div_lo(c)), + lightrec_reg_name(c.r.rs), c.r.op); default: return snprintf(buf, len, "unknown (0x%08x)", c.opcode); } } -void lightrec_print_disassembly(const struct block *block, const u32 *code) +void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) { const struct opcode *op; - const char **flags_ptr; + const char * const *flags_ptr; size_t nb_flags, count, count2; char buf[256], buf2[256], buf3[256]; unsigned int i; - u32 pc, branch_pc; + u32 pc, branch_pc, code; + bool is_io; for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; branch_pc = get_branch_pc(block, i, 0); pc = block->pc + (i << 2); + code = LE32TOH(code_ptr[i]); - count = print_op((union code)code[i], pc, buf, sizeof(buf), - &flags_ptr, &nb_flags); + count = print_op((union code)code, pc, buf, sizeof(buf), + &flags_ptr, &nb_flags, &is_io); flags_ptr = NULL; nb_flags = 0; + is_io = false; count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2), - &flags_ptr, &nb_flags); + &flags_ptr, &nb_flags, &is_io); - if (code[i] == op->c.opcode) { + if (code == op->c.opcode) { *buf2 = '\0'; count2 = 0; } - print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags); + print_flags(buf3, sizeof(buf3), op, flags_ptr, nb_flags, is_io); printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2, buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3); diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index e78013aca..e4685a9db 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -8,6 +8,7 @@ #include "debug.h" #include "lightrec.h" +#include "lightrec-config.h" #ifndef __packed #define __packed __attribute__((packed)) @@ -17,25 +18,58 @@ /* Flags for all opcodes */ #define LIGHTREC_NO_DS BIT(0) -#define LIGHTREC_UNLOAD_RS BIT(1) -#define LIGHTREC_UNLOAD_RT BIT(2) -#define LIGHTREC_UNLOAD_RD BIT(3) -#define LIGHTREC_SYNC BIT(4) +#define LIGHTREC_SYNC BIT(1) /* Flags for load/store opcodes */ -#define LIGHTREC_DIRECT_IO BIT(5) -#define LIGHTREC_HW_IO BIT(6) -#define LIGHTREC_SMC BIT(7) -#define LIGHTREC_NO_INVALIDATE BIT(8) +#define LIGHTREC_SMC BIT(2) +#define LIGHTREC_NO_INVALIDATE BIT(3) +#define LIGHTREC_NO_MASK BIT(4) + +/* I/O mode for load/store opcodes */ +#define LIGHTREC_IO_MODE_LSB 5 +#define LIGHTREC_IO_MODE(x) ((x) << LIGHTREC_IO_MODE_LSB) +#define LIGHTREC_IO_UNKNOWN 0x0 +#define LIGHTREC_IO_DIRECT 0x1 +#define LIGHTREC_IO_HW 0x2 +#define LIGHTREC_IO_RAM 0x3 +#define LIGHTREC_IO_BIOS 0x4 +#define LIGHTREC_IO_SCRATCH 0x5 +#define LIGHTREC_IO_DIRECT_HW 0x6 +#define LIGHTREC_IO_MASK LIGHTREC_IO_MODE(0x7) +#define LIGHTREC_FLAGS_GET_IO_MODE(x) \ + (((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB) /* Flags for branches */ -#define LIGHTREC_EMULATE_BRANCH BIT(5) -#define LIGHTREC_LOCAL_BRANCH BIT(6) +#define LIGHTREC_EMULATE_BRANCH BIT(2) +#define LIGHTREC_LOCAL_BRANCH BIT(3) /* Flags for div/mult opcodes */ -#define LIGHTREC_NO_LO BIT(5) -#define LIGHTREC_NO_HI BIT(6) -#define LIGHTREC_NO_DIV_CHECK BIT(7) +#define LIGHTREC_NO_LO BIT(2) +#define LIGHTREC_NO_HI BIT(3) +#define LIGHTREC_NO_DIV_CHECK BIT(4) + +#define LIGHTREC_REG_RS_LSB 26 +#define LIGHTREC_REG_RS(x) ((x) << LIGHTREC_REG_RS_LSB) +#define LIGHTREC_REG_RS_MASK LIGHTREC_REG_RS(0x3) +#define LIGHTREC_FLAGS_GET_RS(x) \ + (((x) & LIGHTREC_REG_RS_MASK) >> LIGHTREC_REG_RS_LSB) + +#define LIGHTREC_REG_RT_LSB 28 +#define LIGHTREC_REG_RT(x) ((x) << LIGHTREC_REG_RT_LSB) +#define LIGHTREC_REG_RT_MASK LIGHTREC_REG_RT(0x3) +#define LIGHTREC_FLAGS_GET_RT(x) \ + (((x) & LIGHTREC_REG_RT_MASK) >> LIGHTREC_REG_RT_LSB) + +#define LIGHTREC_REG_RD_LSB 30 +#define LIGHTREC_REG_RD(x) ((x) << LIGHTREC_REG_RD_LSB) +#define LIGHTREC_REG_RD_MASK LIGHTREC_REG_RD(0x3) +#define LIGHTREC_FLAGS_GET_RD(x) \ + (((x) & LIGHTREC_REG_RD_MASK) >> LIGHTREC_REG_RD_LSB) + +#define LIGHTREC_REG_NOOP 0x0 +#define LIGHTREC_REG_UNLOAD 0x1 +#define LIGHTREC_REG_DISCARD 0x2 +#define LIGHTREC_REG_CLEAN 0x3 struct block; @@ -73,10 +107,13 @@ enum standard_opcodes { OP_LWC2 = 0x32, OP_SWC2 = 0x3a, - OP_META_BEQZ = 0x14, - OP_META_BNEZ = 0x15, - OP_META_MOV = 0x16, + + OP_META_EXTC = 0x17, + OP_META_EXTS = 0x18, + + OP_META_MULT2 = 0x19, + OP_META_MULTU2 = 0x1a, }; enum special_opcodes { @@ -127,6 +164,28 @@ enum cp0_opcodes { enum cp2_opcodes { OP_CP2_BASIC = 0x00, + OP_CP2_RTPS = 0x01, + OP_CP2_NCLIP = 0x06, + OP_CP2_OP = 0x0c, + OP_CP2_DPCS = 0x10, + OP_CP2_INTPL = 0x11, + OP_CP2_MVMVA = 0x12, + OP_CP2_NCDS = 0x13, + OP_CP2_CDP = 0x14, + OP_CP2_NCDT = 0x16, + OP_CP2_NCCS = 0x1b, + OP_CP2_CC = 0x1c, + OP_CP2_NCS = 0x1e, + OP_CP2_NCT = 0x20, + OP_CP2_SQR = 0x28, + OP_CP2_DCPL = 0x29, + OP_CP2_DPCT = 0x2a, + OP_CP2_AVSZ3 = 0x2d, + OP_CP2_AVSZ4 = 0x2e, + OP_CP2_RTPT = 0x30, + OP_CP2_GPF = 0x3d, + OP_CP2_GPL = 0x3e, + OP_CP2_NCCT = 0x3f, }; enum cp2_basic_opcodes { @@ -197,9 +256,66 @@ struct opcode { struct opcode_i i; struct opcode_j j; }; - u16 flags; + u32 flags; +}; + +struct opcode_list { + u16 nb_ops; + struct opcode ops[]; }; void lightrec_print_disassembly(const struct block *block, const u32 *code); +static inline _Bool op_flag_no_ds(u32 flags) +{ + return OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS); +} + +static inline _Bool op_flag_sync(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_SYNC); +} + +static inline _Bool op_flag_smc(u32 flags) +{ + return OPT_FLAG_STORES && (flags & LIGHTREC_SMC); +} + +static inline _Bool op_flag_no_invalidate(u32 flags) +{ + return (OPT_FLAG_IO || OPT_FLAG_STORES) && + (flags & LIGHTREC_NO_INVALIDATE); +} + +static inline _Bool op_flag_no_mask(u32 flags) +{ + return OPT_FLAG_IO && (flags & LIGHTREC_NO_MASK); +} + +static inline _Bool op_flag_emulate_branch(u32 flags) +{ + return OPT_DETECT_IMPOSSIBLE_BRANCHES && + (flags & LIGHTREC_EMULATE_BRANCH); +} + +static inline _Bool op_flag_local_branch(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_LOCAL_BRANCH); +} + +static inline _Bool op_flag_no_lo(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_LO); +} + +static inline _Bool op_flag_no_hi(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_HI); +} + +static inline _Bool op_flag_no_div_check(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_DIV_CHECK); +} + #endif /* __DISASSEMBLER_H__ */ diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 0e44a77da..be50d6d8a 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -7,43 +7,47 @@ #include "debug.h" #include "disassembler.h" #include "emitter.h" +#include "lightning-wrapper.h" #include "optimizer.h" #include "regcache.h" -#include #include #include -typedef void (*lightrec_rec_func_t)(struct lightrec_state *, - const struct block *, u16); +typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16); /* Forward declarations */ -static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_REGIMM(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_CP0(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_CP2(struct lightrec_state *state, const struct block *block, - u16 offset); +static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset); -static void unknown_opcode(struct lightrec_state *state, const struct block *block, u16 offset) +static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset) { pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", block->opcode_list[offset].c.opcode, block->pc + (offset << 2)); } -static void lightrec_emit_end_of_block(struct lightrec_state *state, +static void +lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit) +{ + /* Prevent jit_jmpi() from using our cycles register as a temporary */ + jit_live(LIGHTREC_REG_CYCLE); + + jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func); +} + +static void lightrec_emit_end_of_block(struct lightrec_cstate *state, const struct block *block, u16 offset, s8 reg_new_pc, u32 imm, u8 ra_reg, u32 link, bool update_cycles) { struct regcache *reg_cache = state->reg_cache; - u32 cycles = state->cycles; jit_state_t *_jit = block->_jit; const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; + u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); jit_note(__FILE__, __LINE__); @@ -62,7 +66,7 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, } if (has_delay_slot(op->c) && - !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) { + !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) { cycles += lightrec_cycles_of_opcode(next->c); /* Recompile the delay slot */ @@ -70,8 +74,8 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, lightrec_rec_opcode(state, block, offset + 1); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean the remaining registers */ + lightrec_clean_regs(reg_cache, _jit); jit_movr(JIT_V0, reg_new_pc); @@ -80,53 +84,37 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, pr_debug("EOB: %u cycles\n", cycles); } - if (offset + !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1) - state->branches[state->nb_branches++] = jit_jmpi(); + lightrec_jump_to_eob(state, _jit); } -void lightrec_emit_eob(struct lightrec_state *state, const struct block *block, - u16 offset) +void lightrec_emit_eob(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - union code c = block->opcode_list[offset].c; - lightrec_storeback_regs(reg_cache, _jit); + lightrec_clean_regs(reg_cache, _jit); jit_movi(JIT_V0, block->pc + (offset << 2)); - jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, - state->cycles - lightrec_cycles_of_opcode(c)); + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); - state->branches[state->nb_branches++] = jit_jmpi(); + lightrec_jump_to_eob(state, _jit); } -static u8 get_jr_jalr_reg(struct lightrec_state *state, const struct block *block, u16 offset) +static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - const struct opcode *op = &block->opcode_list[offset], - *next = &block->opcode_list[offset + 1]; - u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); - - /* If the source register is already mapped to JIT_R0 or JIT_R1, and the - * delay slot is a I/O operation, unload the register, since JIT_R0 and - * JIT_R1 are explicitely used by the I/O opcode generators. */ - if ((rs == JIT_R0 || rs == JIT_R1) && - !(op->flags & LIGHTREC_NO_DS) && - opcode_is_io(next->c) && - !(next->flags & (LIGHTREC_NO_INVALIDATE | LIGHTREC_DIRECT_IO))) { - lightrec_unload_reg(reg_cache, _jit, rs); - lightrec_free_reg(reg_cache, rs); - - rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); - } + const struct opcode *op = &block->opcode_list[offset]; + u8 rs; + rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); lightrec_lock_reg(reg_cache, _jit, rs); return rs; } -static void rec_special_JR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset) { u8 rs = get_jr_jalr_reg(state, block, offset); @@ -134,7 +122,7 @@ static void rec_special_JR(struct lightrec_state *state, const struct block *blo lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true); } -static void rec_special_JALR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset) { u8 rs = get_jr_jalr_reg(state, block, offset); union code c = block->opcode_list[offset].c; @@ -144,7 +132,7 @@ static void rec_special_JALR(struct lightrec_state *state, const struct block *b get_branch_pc(block, offset, 2), true); } -static void rec_J(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -154,7 +142,7 @@ static void rec_J(struct lightrec_state *state, const struct block *block, u16 o 31, 0, true); } -static void rec_JAL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -164,8 +152,45 @@ static void rec_JAL(struct lightrec_state *state, const struct block *block, u16 31, get_branch_pc(block, offset, 2), true); } -static void rec_b(struct lightrec_state *state, const struct block *block, u16 offset, - jit_code_t code, u32 link, bool unconditional, bool bz) +static void lightrec_do_early_unload(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + unsigned int i; + u8 reg; + struct { + u8 reg, op; + } reg_ops[3] = { + { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), }, + { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), }, + { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), }, + }; + + for (i = 0; i < ARRAY_SIZE(reg_ops); i++) { + reg = reg_ops[i].reg; + + switch (reg_ops[i].op) { + case LIGHTREC_REG_UNLOAD: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true); + break; + + case LIGHTREC_REG_DISCARD: + lightrec_discard_reg_if_loaded(reg_cache, reg); + break; + + case LIGHTREC_REG_CLEAN: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false); + break; + default: + break; + }; + } +} + +static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset, + jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz) { struct regcache *reg_cache = state->reg_cache; struct native_register *regs_backup; @@ -174,39 +199,54 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; jit_node_t *addr; - u8 link_reg; - u32 target_offset, cycles = state->cycles; + u8 link_reg, rs, rt; bool is_forward = (s16)op->i.imm >= -1; + int op_cycles = lightrec_cycles_of_opcode(op->c); + u32 target_offset, cycles = state->cycles + op_cycles; + bool no_indirection = false; u32 next_pc; jit_note(__FILE__, __LINE__); - if (!(op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(op->flags)) cycles += lightrec_cycles_of_opcode(next->c); - state->cycles = 0; + state->cycles = -op_cycles; + + if (!unconditional) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT); + rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, + _jit, op->i.rt, REG_EXT); + + /* Unload dead registers before evaluating the branch */ + if (OPT_EARLY_UNLOAD) + lightrec_do_early_unload(state, block, offset); + + if (op_flag_local_branch(op->flags) && + (op_flag_no_ds(op->flags) || !next->opcode) && + is_forward && !lightrec_has_dirty_regs(reg_cache)) + no_indirection = true; + + if (no_indirection) + pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2); + } if (cycles) jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); if (!unconditional) { - u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT), - rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, - _jit, op->i.rt, REG_EXT); - /* Generate the branch opcode */ - addr = jit_new_node_pww(code, NULL, rs, rt); + if (!no_indirection) + addr = jit_new_node_pww(code, NULL, rs, rt); lightrec_free_regs(reg_cache); regs_backup = lightrec_regcache_enter_branch(reg_cache); } - if (op->flags & LIGHTREC_LOCAL_BRANCH) { - if (next && !(op->flags & LIGHTREC_NO_DS)) { - /* Recompile the delay slot */ - if (next->opcode) - lightrec_rec_opcode(state, block, offset + 1); - } + if (op_flag_local_branch(op->flags)) { + /* Recompile the delay slot */ + if (!op_flag_no_ds(op->flags) && next->opcode) + lightrec_rec_opcode(state, block, offset + 1); if (link) { /* Update the $ra register */ @@ -215,31 +255,36 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o lightrec_free_reg(reg_cache, link_reg); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean remaining registers */ + lightrec_clean_regs(reg_cache, _jit); target_offset = offset + 1 + (s16)op->i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); pr_debug("Adding local branch to offset 0x%x\n", target_offset << 2); branch = &state->local_branches[ state->nb_local_branches++]; branch->target = target_offset; - if (is_forward) - branch->branch = jit_jmpi(); + + if (no_indirection) + branch->branch = jit_new_node_pww(code2, NULL, rs, rt); + else if (is_forward) + branch->branch = jit_b(); else branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0); } - if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) { + if (!op_flag_local_branch(op->flags) || !is_forward) { next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm); lightrec_emit_end_of_block(state, block, offset, -1, next_pc, 31, link, false); } if (!unconditional) { - jit_patch(addr); + if (!no_indirection) + jit_patch(addr); + lightrec_regcache_leave_branch(reg_cache, regs_backup); if (bz && link) { @@ -250,79 +295,88 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o lightrec_free_reg(reg_cache, link_reg); } - if (!(op->flags & LIGHTREC_NO_DS) && next->opcode) + if (!op_flag_no_ds(op->flags) && next->opcode) lightrec_rec_opcode(state, block, offset + 1); } } -static void rec_BNE(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BNE(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + union code c = block->opcode_list[offset].c; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_beqr, 0, false, false); + + if (c.i.rt == 0) + rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true); + else + rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false); } -static void rec_BEQ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BEQ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bner, 0, - c.i.rs == c.i.rt, false); + + if (c.i.rt == 0) + rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true); + else + rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false); } -static void rec_BLEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BLEZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true); + rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true); } -static void rec_BGTZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BGTZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blei, 0, false, true); + rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true); } -static void rec_regimm_BLTZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BLTZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, 0, false, true); + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true); } -static void rec_regimm_BLTZAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BLTZAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, get_branch_pc(block, offset, 2), false, true); } -static void rec_regimm_BGEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BGEZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true); + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true); } -static void rec_regimm_BGEZAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BGEZAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { const struct opcode *op = &block->opcode_list[offset]; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, get_branch_pc(block, offset, 2), !op->i.rs, true); } -static void rec_alu_imm(struct lightrec_state *state, const struct block *block, +static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, bool slti) { struct regcache *reg_cache = state->reg_cache; @@ -343,7 +397,7 @@ static void rec_alu_imm(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_alu_special(struct lightrec_state *state, const struct block *block, +static void rec_alu_special(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, bool out_ext) { struct regcache *reg_cache = state->reg_cache; @@ -364,7 +418,7 @@ static void rec_alu_special(struct lightrec_state *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } -static void rec_alu_shiftv(struct lightrec_state *state, const struct block *block, +static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -398,39 +452,60 @@ static void rec_alu_shiftv(struct lightrec_state *state, const struct block *blo lightrec_free_reg(reg_cache, rd); } -static void rec_ADDIU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_movi(struct lightrec_cstate *state, + const struct block *block, u16 offset) { - _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u16 flags = REG_EXT; + u8 rt; + + if (!(c.i.imm & 0x8000)) + flags |= REG_ZEXT; + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + jit_movi(rt, (s32)(s16) c.i.imm); + + lightrec_free_reg(reg_cache, rt); } -static void rec_ADDI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ADDIU(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + _jit_name(block->_jit, __func__); + if (block->opcode_list[offset].c.i.rs) + rec_alu_imm(state, block, offset, jit_code_addi, false); + else + rec_movi(state, block, offset); +} + +static void rec_ADDI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + rec_ADDIU(state, block, offset); } -static void rec_SLTIU(struct lightrec_state *state, const struct block *block, - u16 offset) - +static void rec_SLTIU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_imm(state, block, offset, jit_code_lti_u, true); } -static void rec_SLTI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SLTI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_imm(state, block, offset, jit_code_lti, true); } -static void rec_ANDI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ANDI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -456,7 +531,7 @@ static void rec_ANDI(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_alu_or_xor(struct lightrec_state *state, const struct block *block, +static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -478,22 +553,22 @@ static void rec_alu_or_xor(struct lightrec_state *state, const struct block *blo } -static void rec_ORI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ORI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_or_xor(state, block, offset, jit_code_ori); } -static void rec_XORI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_XORI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_or_xor(state, block, offset, jit_code_xori); } -static void rec_LUI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_LUI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -513,38 +588,38 @@ static void rec_LUI(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_special_ADDU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_ADDU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_addr, false); } -static void rec_special_ADD(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_ADD(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_addr, false); } -static void rec_special_SUBU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SUBU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_subr, false); } -static void rec_special_SUB(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SUB(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_subr, false); } -static void rec_special_AND(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_AND(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -578,8 +653,8 @@ static void rec_special_AND(struct lightrec_state *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } -static void rec_special_or_nor(struct lightrec_state *state, const struct block *block, - u16 offset, bool nor) +static void rec_special_or_nor(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool nor) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -599,10 +674,8 @@ static void rec_special_or_nor(struct lightrec_state *state, const struct block if (!nor) flags_rd = REG_ZEXT & flags_rs & flags_rt; - /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */ - if ((REG_EXT & flags_rs & flags_rt) || - (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) || - (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT)) + /* E(rd) = E(rs) & E(rt) */ + if (REG_EXT & flags_rs & flags_rt) flags_rd |= REG_EXT; lightrec_set_reg_out_flags(reg_cache, rd, flags_rd); @@ -617,21 +690,21 @@ static void rec_special_or_nor(struct lightrec_state *state, const struct block lightrec_free_reg(reg_cache, rd); } -static void rec_special_OR(struct lightrec_state *state, +static void rec_special_OR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_special_or_nor(state, block, offset, false); } -static void rec_special_NOR(struct lightrec_state *state, +static void rec_special_NOR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_special_or_nor(state, block, offset, true); } -static void rec_special_XOR(struct lightrec_state *state, +static void rec_special_XOR(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; @@ -664,42 +737,42 @@ static void rec_special_XOR(struct lightrec_state *state, lightrec_free_reg(reg_cache, rd); } -static void rec_special_SLTU(struct lightrec_state *state, +static void rec_special_SLTU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_ltr_u, true); } -static void rec_special_SLT(struct lightrec_state *state, +static void rec_special_SLT(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_ltr, true); } -static void rec_special_SLLV(struct lightrec_state *state, +static void rec_special_SLLV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_lshr); } -static void rec_special_SRLV(struct lightrec_state *state, +static void rec_special_SRLV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_rshr_u); } -static void rec_special_SRAV(struct lightrec_state *state, +static void rec_special_SRAV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_rshr); } -static void rec_alu_shift(struct lightrec_state *state, const struct block *block, +static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -728,33 +801,33 @@ static void rec_alu_shift(struct lightrec_state *state, const struct block *bloc lightrec_free_reg(reg_cache, rd); } -static void rec_special_SLL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SLL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_lshi); } -static void rec_special_SRL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SRL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_rshi_u); } -static void rec_special_SRA(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SRA(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_rshi); } -static void rec_alu_mult(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_signed) +static void rec_alu_mult(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool is_signed) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; + u32 flags = block->opcode_list[offset].flags; u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; @@ -770,59 +843,59 @@ static void rec_alu_mult(struct lightrec_state *state, const struct block *block rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); else if (__WORDSIZE == 32) lo = lightrec_alloc_reg_temp(reg_cache, _jit); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT); -#if __WORDSIZE == 32 - /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit - * operation if the MULT was detected a 32-bit only. */ - if (!(flags & LIGHTREC_NO_HI)) { - if (is_signed) - jit_qmulr(lo, hi, rs, rt); - else - jit_qmulr_u(lo, hi, rs, rt); - } else { - jit_mulr(lo, rs, rt); - } -#else - /* On 64-bit systems, do a 64*64->64 bit operation. */ - if (flags & LIGHTREC_NO_LO) { - jit_mulr(hi, rs, rt); - jit_rshi(hi, hi, 32); + if (__WORDSIZE == 32) { + /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit + * operation if the MULT was detected a 32-bit only. */ + if (!op_flag_no_hi(flags)) { + if (is_signed) + jit_qmulr(lo, hi, rs, rt); + else + jit_qmulr_u(lo, hi, rs, rt); + } else { + jit_mulr(lo, rs, rt); + } } else { - jit_mulr(lo, rs, rt); + /* On 64-bit systems, do a 64*64->64 bit operation. */ + if (op_flag_no_lo(flags)) { + jit_mulr(hi, rs, rt); + jit_rshi(hi, hi, 32); + } else { + jit_mulr(lo, rs, rt); - /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ - if (!(flags & LIGHTREC_NO_HI)) - jit_rshi(hi, lo, 32); + /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ + if (!op_flag_no_hi(flags)) + jit_rshi(hi, lo, 32); + } } -#endif lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32) + if (!op_flag_no_lo(flags) || __WORDSIZE == 32) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } -static void rec_alu_div(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_signed) +static void rec_alu_div(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool is_signed) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; - bool no_check = flags & LIGHTREC_NO_DIV_CHECK; + u32 flags = block->opcode_list[offset].flags; + bool no_check = op_flag_no_div_check(flags); u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; jit_node_t *branch, *to_end; - u8 lo, hi, rs, rt, rflags = 0; + u8 lo = 0, hi = 0, rs, rt, rflags = 0; jit_note(__FILE__, __LINE__); @@ -834,23 +907,22 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0); /* Jump to special handler if dividing by zero */ if (!no_check) branch = jit_beqi(rt, 0); -#if __WORDSIZE == 32 - if (flags & LIGHTREC_NO_LO) { + if (op_flag_no_lo(flags)) { if (is_signed) jit_remr(hi, rs, rt); else jit_remr_u(hi, rs, rt); - } else if (flags & LIGHTREC_NO_HI) { + } else if (op_flag_no_hi(flags)) { if (is_signed) jit_divr(lo, rs, rt); else @@ -861,36 +933,14 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, else jit_qdivr_u(lo, hi, rs, rt); } -#else - /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend - * (if div) or clear (if divu) the input registers. */ - if (flags & LIGHTREC_NO_LO) { - if (is_signed) - jit_remr(hi, rs, rt); - else - jit_remr_u(hi, rs, rt); - } else if (flags & LIGHTREC_NO_HI) { - if (is_signed) - jit_divr(lo, rs, rt); - else - jit_divr_u(lo, rs, rt); - } else { - if (is_signed) - jit_qdivr(lo, hi, rs, rt); - else - jit_qdivr_u(lo, hi, rs, rt); - } -#endif if (!no_check) { - lightrec_regcache_mark_live(reg_cache, _jit); - /* Jump above the div-by-zero handler */ - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(branch); - if (!(flags & LIGHTREC_NO_LO)) { + if (!op_flag_no_lo(flags)) { if (is_signed) { jit_lti(lo, rs, 0); jit_lshi(lo, lo, 1); @@ -900,7 +950,7 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, } } - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) jit_movr(hi, rs); jit_patch(to_end); @@ -909,42 +959,42 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } -static void rec_special_MULT(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MULT(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_mult(state, block, offset, true); } -static void rec_special_MULTU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MULTU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_mult(state, block, offset, false); } -static void rec_special_DIV(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_DIV(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_div(state, block, offset, true); } -static void rec_special_DIVU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_DIVU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_div(state, block, offset, false); } -static void rec_alu_mv_lo_hi(struct lightrec_state *state, +static void rec_alu_mv_lo_hi(struct lightrec_cstate *state, const struct block *block, u8 dst, u8 src) { struct regcache *reg_cache = state->reg_cache; @@ -954,18 +1004,14 @@ static void rec_alu_mv_lo_hi(struct lightrec_state *state, src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0); dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT); -#if __WORDSIZE == 32 - jit_movr(dst, src); -#else jit_extr_i(dst, src); -#endif lightrec_free_reg(reg_cache, src); lightrec_free_reg(reg_cache, dst); } -static void rec_special_MFHI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MFHI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -973,8 +1019,8 @@ static void rec_special_MFHI(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI); } -static void rec_special_MTHI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MTHI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -982,8 +1028,8 @@ static void rec_special_MTHI(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs); } -static void rec_special_MFLO(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MFLO(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -991,8 +1037,8 @@ static void rec_special_MFLO(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO); } -static void rec_special_MTLO(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MTLO(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -1000,42 +1046,57 @@ static void rec_special_MTLO(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs); } -static void call_to_c_wrapper(struct lightrec_state *state, const struct block *block, - u32 arg, bool with_arg, enum c_wrappers wrapper) +static void call_to_c_wrapper(struct lightrec_cstate *state, + const struct block *block, u32 arg, + enum c_wrappers wrapper) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp, tmp2, tmp3; + s8 tmp, tmp2; - if (with_arg) - tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); - tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); - tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */ + tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); + + tmp = lightrec_get_reg_with_value(reg_cache, + (intptr_t) state->state->wrappers_eps[wrapper]); + if (tmp < 0) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrappers_eps[wrapper])); + + lightrec_temp_set_value(reg_cache, tmp, + (intptr_t) state->state->wrappers_eps[wrapper]); + } + + lightrec_free_reg(reg_cache, tmp2); - jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrapper)); - jit_ldxi(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[wrapper])); - if (with_arg) - jit_movi(tmp3, arg); +#ifdef __mips__ + /* On MIPS, register t9 is always used as the target register for JALR. + * Therefore if it does not contain the target address we must + * invalidate it. */ + if (tmp != _T9) + lightrec_unload_reg(reg_cache, _jit, _T9); +#endif + + jit_prepare(); + jit_pushargi(arg); + lightrec_regcache_mark_live(reg_cache, _jit); jit_callr(tmp); lightrec_free_reg(reg_cache, tmp); - lightrec_free_reg(reg_cache, tmp2); - if (with_arg) - lightrec_free_reg(reg_cache, tmp3); lightrec_regcache_mark_live(reg_cache, _jit); } -static void rec_io(struct lightrec_state *state, const struct block *block, u16 offset, +static void rec_io(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool load_rt, bool read_rt) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; - bool is_tagged = flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + u32 flags = block->opcode_list[offset].flags; + bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags); u32 lut_entry; jit_note(__FILE__, __LINE__); @@ -1048,19 +1109,167 @@ static void rec_io(struct lightrec_state *state, const struct block *block, u16 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); if (is_tagged) { - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW); } else { lut_entry = lightrec_get_lut_entry(block); call_to_c_wrapper(state, block, (lut_entry << 16) | offset, - true, C_WRAPPER_RW_GENERIC); + C_WRAPPER_RW_GENERIC); + } +} + +static u32 rec_ram_mask(struct lightrec_state *state) +{ + return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1; +} + +static u32 rec_io_mask(const struct lightrec_state *state) +{ + u32 length = state->maps[PSX_MAP_HW_REGISTERS].length; + + return GENMASK(31 - clz32(length - 1), 0); +} + +static void rec_store_memory(struct lightrec_cstate *cstate, + const struct block *block, + u16 offset, jit_code_t code, + jit_code_t swap_code, + uintptr_t addr_offset, u32 addr_mask, + bool invalidate) +{ + const struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; + struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + union code c = op->c; + u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2; + s16 imm = (s16)c.i.imm; + s32 simm = (s32)imm << (1 - lut_is_32bit(state)); + s32 lut_offt = offsetof(struct lightrec_state, code_lut); + bool no_mask = op_flag_no_mask(op->flags); + bool add_imm = c.i.imm && + ((!state->mirrors_mapped && !no_mask) || (invalidate && + ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); + bool need_tmp = !no_mask || addr_offset || add_imm || invalidate; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + if (need_tmp) + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + addr_reg = rs; + + if (add_imm) { + jit_addi(tmp, addr_reg, (s16)c.i.imm); + addr_reg = tmp; + imm = 0; + } else if (simm) { + lut_offt += simm; + } + + if (!no_mask) { + jit_andi(tmp, addr_reg, addr_mask); + addr_reg = tmp; + } + + if (addr_offset) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_addi(tmp2, addr_reg, addr_offset); + addr_reg2 = tmp2; + } else { + addr_reg2 = addr_reg; + } + + if (is_big_endian() && swap_code && c.i.rt) { + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp3, rt); + jit_new_node_www(code, imm, addr_reg2, tmp3); + + lightrec_free_reg(reg_cache, tmp3); + } else { + jit_new_node_www(code, imm, addr_reg2, rt); + } + + lightrec_free_reg(reg_cache, rt); + + if (invalidate) { + tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0); + + if (c.i.op != OP_SW) { + jit_andi(tmp, addr_reg, ~3); + addr_reg = tmp; + } + + if (!lut_is_32bit(state)) { + jit_lshi(tmp, addr_reg, 1); + addr_reg = tmp; + } + + if (addr_reg == rs && c.i.rs == 0) { + addr_reg = LIGHTREC_REG_STATE; + } else { + jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE); + addr_reg = tmp; + } + + if (lut_is_32bit(state)) + jit_stxi_i(lut_offt, addr_reg, tmp3); + else + jit_stxi(lut_offt, addr_reg, tmp3); + + lightrec_free_reg(reg_cache, tmp3); } + + if (addr_offset) + lightrec_free_reg(reg_cache, tmp2); + if (need_tmp) + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, rs); +} + +static void rec_store_ram(struct lightrec_cstate *cstate, + const struct block *block, + u16 offset, jit_code_t code, + jit_code_t swap_code, bool invalidate) +{ + struct lightrec_state *state = cstate->state; + + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + state->offset_ram, rec_ram_mask(state), + invalidate); +} + +static void rec_store_scratch(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + cstate->state->offset_scratch, + 0x1fffffff, false); } -static void rec_store_direct_no_invalidate(struct lightrec_state *state, +static void rec_store_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + cstate->state->offset_io, + rec_io_mask(cstate->state), false); +} + +static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, + jit_code_t swap_code) { - struct regcache *reg_cache = state->reg_cache; + struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; @@ -1092,11 +1301,9 @@ static void rec_store_direct_no_invalidate(struct lightrec_state *state, if (state->offset_ram != state->offset_scratch) { to_not_ram = jit_bmsi(tmp, BIT(28)); - lightrec_regcache_mark_live(reg_cache, _jit); - jit_movi(tmp2, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(to_not_ram); jit_movi(tmp2, state->offset_scratch); @@ -1111,16 +1318,28 @@ static void rec_store_direct_no_invalidate(struct lightrec_state *state, } rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, imm, tmp, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp2, rt); + jit_new_node_www(code, imm, tmp, tmp2); + + lightrec_free_reg(reg_cache, tmp2); + } else { + jit_new_node_www(code, imm, tmp, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } -static void rec_store_direct(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code) +static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block, + u16 offset, jit_code_t code, jit_code_t swap_code) { - struct regcache *reg_cache = state->reg_cache; + struct lightrec_state *state = cstate->state; + u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; @@ -1135,32 +1354,32 @@ static void rec_store_direct(struct lightrec_state *state, const struct block *b /* Convert to KUNSEG and avoid RAM mirrors */ if (c.i.imm) { jit_addi(tmp2, rs, (s16)c.i.imm); - jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1)); + jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1)); } else { - jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1)); + jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1)); } lightrec_free_reg(reg_cache, rs); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - to_not_ram = jit_bgti(tmp2, RAM_SIZE); - - lightrec_regcache_mark_live(reg_cache, _jit); + to_not_ram = jit_bgti(tmp2, ram_size); /* Compute the offset to the code LUT */ jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3); -#if __WORDSIZE == 64 - jit_lshi(tmp, tmp, 1); -#endif + if (!lut_is_32bit(state)) + jit_lshi(tmp, tmp, 1); jit_addr(tmp, LIGHTREC_REG_STATE, tmp); /* Write NULL to the code LUT to invalidate any block that's there */ - jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + else + jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); if (state->offset_ram != state->offset_scratch) { jit_movi(tmp, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); } jit_patch(to_not_ram); @@ -1178,76 +1397,203 @@ static void rec_store_direct(struct lightrec_state *state, const struct block *b lightrec_free_reg(reg_cache, tmp3); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, 0, tmp2, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp, rt); + jit_new_node_www(code, 0, tmp2, tmp); + + lightrec_free_reg(reg_cache, tmp); + } else { + jit_new_node_www(code, 0, tmp2, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp2); } -static void rec_store(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code) -{ - u16 flags = block->opcode_list[offset].flags; - - if (flags & LIGHTREC_NO_INVALIDATE) { - rec_store_direct_no_invalidate(state, block, offset, code); - } else if (flags & LIGHTREC_DIRECT_IO) { - if (state->invalidate_from_dma_only) - rec_store_direct_no_invalidate(state, block, offset, code); - else - rec_store_direct(state, block, offset, code); - } else { +static void rec_store(struct lightrec_cstate *state, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + u32 flags = block->opcode_list[offset].flags; + bool no_invalidate = op_flag_no_invalidate(flags) || + state->state->invalidate_from_dma_only; + + switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { + case LIGHTREC_IO_RAM: + rec_store_ram(state, block, offset, code, + swap_code, !no_invalidate); + break; + case LIGHTREC_IO_SCRATCH: + rec_store_scratch(state, block, offset, code, swap_code); + break; + case LIGHTREC_IO_DIRECT: + if (no_invalidate) { + rec_store_direct_no_invalidate(state, block, offset, + code, swap_code); + } else { + rec_store_direct(state, block, offset, code, swap_code); + } + break; + case LIGHTREC_IO_DIRECT_HW: + rec_store_io(state, block, offset, code, swap_code); + break; + default: rec_io(state, block, offset, true, false); + break; } } -static void rec_SB(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SB(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_c); + rec_store(state, block, offset, jit_code_stxi_c, 0); } -static void rec_SH(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SH(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_s); + rec_store(state, block, offset, + jit_code_stxi_s, jit_code_bswapr_us); } -static void rec_SW(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SW(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_i); + rec_store(state, block, offset, + jit_code_stxi_i, jit_code_bswapr_ui); } -static void rec_SWL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, false); } -static void rec_SWR(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWR(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, false); } -static void rec_SWC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, false, false); } -static void rec_load_direct(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) +static void rec_load_memory(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned, + uintptr_t addr_offset, u32 addr_mask) { - struct regcache *reg_cache = state->reg_cache; + struct regcache *reg_cache = cstate->reg_cache; + struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + u8 rs, rt, addr_reg, flags = REG_EXT; + bool no_mask = op_flag_no_mask(op->flags); + union code c = op->c; + s16 imm; + + if (!c.i.rt) + return; + + if (is_unsigned) + flags |= REG_ZEXT; + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) { + jit_addi(rt, rs, (s16)c.i.imm); + addr_reg = rt; + imm = 0; + } else { + addr_reg = rs; + imm = (s16)c.i.imm; + } + + if (!no_mask) { + jit_andi(rt, addr_reg, addr_mask); + addr_reg = rt; + } + + if (addr_offset) { + jit_addi(rt, addr_reg, addr_offset); + addr_reg = rt; + } + + jit_new_node_www(code, rt, addr_reg, imm); + + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); +} + +static void rec_load_ram(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_ram, rec_ram_mask(cstate->state)); +} + +static void rec_load_bios(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_bios, 0x1fffffff); +} + +static void rec_load_scratch(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_scratch, 0x1fffffff); +} + +static void rec_load_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_io, rec_io_mask(cstate->state)); +} + +static void rec_load_direct(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, + bool is_unsigned) +{ + struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2; @@ -1296,15 +1642,13 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl } else { to_not_ram = jit_bmsi(addr_reg, BIT(28)); - lightrec_regcache_mark_live(reg_cache, _jit); - /* Convert to KUNSEG and avoid RAM mirrors */ jit_andi(rt, addr_reg, RAM_SIZE - 1); if (state->offset_ram) jit_movi(tmp, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(to_not_ram); @@ -1317,7 +1661,7 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl jit_movi(tmp, state->offset_bios); if (state->offset_bios != state->offset_scratch) { - to_end2 = jit_jmpi(); + to_end2 = jit_b(); jit_patch(to_not_bios); @@ -1338,79 +1682,122 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl jit_new_node_www(code, rt, rt, imm); + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + lightrec_free_reg(reg_cache, addr_reg); lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } -static void rec_load(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) -{ - u16 flags = block->opcode_list[offset].flags; - - if (flags & LIGHTREC_DIRECT_IO) - rec_load_direct(state, block, offset, code, is_unsigned); - else +static void rec_load(struct lightrec_cstate *state, const struct block *block, + u16 offset, jit_code_t code, jit_code_t swap_code, + bool is_unsigned) +{ + u32 flags = block->opcode_list[offset].flags; + + switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { + case LIGHTREC_IO_RAM: + rec_load_ram(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_BIOS: + rec_load_bios(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_SCRATCH: + rec_load_scratch(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_DIRECT_HW: + rec_load_io(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_DIRECT: + rec_load_direct(state, block, offset, code, swap_code, is_unsigned); + break; + default: rec_io(state, block, offset, false, true); + break; + } } -static void rec_LB(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_c, false); + rec_load(state, block, offset, jit_code_ldxi_c, 0, false); } -static void rec_LBU(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_uc, true); + rec_load(state, block, offset, jit_code_ldxi_uc, 0, true); } -static void rec_LH(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_s, false); + rec_load(state, block, offset, code, jit_code_bswapr_us, false); } -static void rec_LHU(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_us, true); + rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true); } -static void rec_LWL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, true); } -static void rec_LWR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, true); } -static void rec_LW(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code; + + if (is_big_endian() && __WORDSIZE == 64) + code = jit_code_ldxi_ui; + else + code = jit_code_ldxi_i; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_i, false); + rec_load(state, block, offset, code, jit_code_bswapr_ui, false); } -static void rec_LWC2(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, false, false); } -static void rec_break_syscall(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_break) +static void rec_break_syscall(struct lightrec_cstate *state, + const struct block *block, u16 offset, + u32 exit_code) { + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 tmp; + _jit_note(block->_jit, __FILE__, __LINE__); - if (is_break) - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK); - else - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL); + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_movi(tmp, exit_code); + jit_stxi_i(offsetof(struct lightrec_state, exit_flags), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); /* TODO: the return address should be "pc - 4" if we're a delay slot */ lightrec_emit_end_of_block(state, block, offset, -1, @@ -1418,20 +1805,21 @@ static void rec_break_syscall(struct lightrec_state *state, const struct block * 31, 0, true); } -static void rec_special_SYSCALL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_SYSCALL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, false); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL); } -static void rec_special_BREAK(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_BREAK(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, true); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK); } -static void rec_mfc(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_mfc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1440,10 +1828,10 @@ static void rec_mfc(struct lightrec_state *state, const struct block *block, jit_note(__FILE__, __LINE__); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC); } -static void rec_mtc(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1453,85 +1841,494 @@ static void rec_mtc(struct lightrec_state *state, const struct block *block, u16 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC); if (c.i.op == OP_CP0 && - !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) && + !op_flag_no_ds(block->opcode_list[offset].flags) && (c.r.rd == 12 || c.r.rd == 13)) lightrec_emit_end_of_block(state, block, offset, -1, get_ds_pc(block, offset, 1), 0, 0, true); } -static void rec_cp0_MFC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void +rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + + jit_note(__FILE__, __LINE__); + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT); + + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[c.r.rd])); + + lightrec_free_reg(reg_cache, rt); +} + +static bool block_in_bios(const struct lightrec_cstate *state, + const struct block *block) +{ + const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS]; + u32 pc = kunseg(block->pc); + + return pc >= bios->pc && pc < bios->pc + bios->length; +} + +static void +rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp = 0, tmp2, status; + jit_node_t *to_end; + + jit_note(__FILE__, __LINE__); + + switch(c.r.rd) { + case 1: + case 4: + case 8: + case 14: + case 15: + /* Those registers are read-only */ + return; + default: + break; + } + + if (block_in_bios(state, block) && c.r.rd == 12) { + /* If we are running code from the BIOS, handle writes to the + * Status register in C. BIOS code may toggle bit 16 which will + * map/unmap the RAM, while game code cannot do that. */ + rec_mtc(state, block, offset); + return; + } + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); + + if (c.r.rd != 13) { + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]), + LIGHTREC_REG_STATE, rt); + } + + if (c.r.rd == 12 || c.r.rd == 13) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + } + + if (c.r.rd == 12) { + status = rt; + } else if (c.r.rd == 13) { + /* Cause = (Cause & ~0x0300) | (value & 0x0300) */ + jit_andi(tmp2, rt, 0x0300); + jit_ori(tmp, tmp, 0x0300); + jit_xori(tmp, tmp, 0x0300); + jit_orr(tmp, tmp, tmp2); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[12])); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]), + LIGHTREC_REG_STATE, tmp); + status = tmp2; + } + + if (c.r.rd == 12 || c.r.rd == 13) { + /* Exit dynarec in case there's a software interrupt. + * exit_flags = !!(status & tmp & 0x0300) & status; */ + jit_andr(tmp, tmp, status); + jit_andi(tmp, tmp, 0x0300); + jit_nei(tmp, tmp, 0); + jit_andr(tmp, tmp, status); + } + + if (c.r.rd == 12) { + /* Exit dynarec in case we unmask a hardware interrupt. + * exit_flags = !(~status & 0x401) */ + + jit_comr(tmp2, status); + jit_andi(tmp2, tmp2, 0x401); + jit_eqi(tmp2, tmp2, 0); + jit_orr(tmp, tmp, tmp2); + } + + lightrec_free_reg(reg_cache, rt); + + if (c.r.rd == 12 || c.r.rd == 13) { + to_end = jit_beqi(tmp, 0); + + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(tmp2, tmp2, LIGHTREC_REG_CYCLE); + jit_movi(LIGHTREC_REG_CYCLE, 0); + jit_stxi_i(offsetof(struct lightrec_state, target_cycle), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, tmp2); + + + jit_patch(to_end); + } + + if (!op_flag_no_ds(block->opcode_list[offset].flags) && + (c.r.rd == 12 || c.r.rd == 13)) { + state->cycles += lightrec_cycles_of_opcode(c); + lightrec_emit_eob(state, block, offset + 1); + } +} + +static void rec_cp0_MFC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + rec_mfc0(state, block, offset); } -static void rec_cp0_CFC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_CFC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + rec_mfc0(state, block, offset); } -static void rec_cp0_MTC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_MTC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + rec_mtc0(state, block, offset); } -static void rec_cp0_CTC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_CTC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + rec_mtc0(state, block, offset); } -static void rec_cp2_basic_MFC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static unsigned int cp2d_i_offset(u8 reg) { + return offsetof(struct lightrec_state, regs.cp2d[reg]); +} + +static unsigned int cp2d_s_offset(u8 reg) +{ + return cp2d_i_offset(reg) + is_big_endian() * 2; +} + +static unsigned int cp2c_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2c[reg]); +} + +static unsigned int cp2c_s_offset(u8 reg) +{ + return cp2c_i_offset(reg) + is_big_endian() * 2; +} + +static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + const u32 zext_regs = 0x300f0080; + u8 rt, tmp, tmp2, tmp3, out, flags; + u8 reg = c.r.rd == 15 ? 14 : c.r.rd; + unsigned int i; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mfc(state, block, offset); + return; + } + + flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT; + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags); + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); + break; + case 7: + case 16: + case 17: + case 18: + case 19: + jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); + break; + case 28: + case 29: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + for (i = 0; i < 3; i++) { + out = i == 0 ? rt : tmp; + + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i)); + jit_movi(tmp2, 0x1f); + jit_rshi(out, tmp, 7); + + jit_ltr(tmp3, tmp2, out); + jit_movnr(out, tmp2, tmp3); + + jit_gei(tmp2, out, 0); + jit_movzr(out, tmp2, tmp2); + + if (i > 0) { + jit_lshi(tmp, tmp, 5 * i); + jit_orr(rt, rt, tmp); + } + } + + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + lightrec_free_reg(reg_cache, tmp3); + break; + default: + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg)); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_CFC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mfc(state, block, offset); + return; + } + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd)); + break; + default: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); + jit_ldxi_ui(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd)); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_MTC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + jit_node_t *loop, *to_loop; + u8 rt, tmp, tmp2, flags = 0; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mtc(state, block, offset); + return; + } + + if (c.r.rd == 31) + return; + + if (c.r.rd == 30) + flags |= REG_EXT; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); + + switch (c.r.rd) { + case 15: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13)); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14)); + + jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + case 28: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_lshi(tmp, rt, 7); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp); + + jit_lshi(tmp, rt, 2); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp); + + jit_rshi(tmp, rt, 3); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + break; + case 30: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* if (rt < 0) rt = ~rt; */ + jit_rshi(tmp, rt, 31); + jit_xorr(tmp, rt, tmp); + + /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */ + jit_lshi(tmp, tmp, 1); + jit_movi(tmp2, 33); + + /* Decrement tmp2 and right-shift the value by 1 until it equals zero */ + loop = jit_label(); + jit_subi(tmp2, tmp2, 1); + jit_rshi_u(tmp, tmp, 1); + to_loop = jit_bnei(tmp, 0); + + jit_patch_at(to_loop, loop); + + jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + default: + jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_CTC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp, tmp2; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mtc(state, block, offset); + return; + } + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + break; + case 31: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_andi(tmp, rt, 0x7f87e000); + jit_nei(tmp, tmp, 0); + jit_lshi(tmp, tmp, 31); + + jit_andi(tmp2, rt, 0x7ffff000); + jit_orr(tmp, tmp2, tmp); + + jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + + default: + jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp0_RFE(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_RFE(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; + u8 status, tmp; jit_name(__func__); jit_note(__FILE__, __LINE__); - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_RFE); + status = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(status, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[12])); + + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* status = ((status >> 2) & 0xf) | status & ~0xf; */ + jit_rshi(tmp, status, 2); + jit_andi(tmp, tmp, 0xf); + jit_andi(status, status, ~0xful); + jit_orr(status, status, tmp); + + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[13])); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]), + LIGHTREC_REG_STATE, status); + + /* Exit dynarec in case there's a software interrupt. + * exit_flags = !!(status & cause & 0x0300) & status; */ + jit_andr(tmp, tmp, status); + jit_andi(tmp, tmp, 0x0300); + jit_nei(tmp, tmp, 0); + jit_andr(tmp, tmp, status); + jit_stxi_i(offsetof(struct lightrec_state, exit_flags), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, status); + lightrec_free_reg(reg_cache, tmp); } -static void rec_CP(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; @@ -1539,48 +2336,119 @@ static void rec_CP(struct lightrec_state *state, const struct block *block, jit_name(__func__); jit_note(__FILE__, __LINE__); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP); } -static void rec_meta_BEQZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_MOV(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rs, rd; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bnei, 0, false, true); + jit_note(__FILE__, __LINE__); + if (c.r.rs) + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT); + + if (c.r.rs == 0) + jit_movi(rd, 0); + else + jit_extr_i(rd, rs); + + if (c.r.rs) + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rd); } -static void rec_meta_BNEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, + const struct block *block, + u16 offset) { + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rs, rt; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_beqi, 0, false, true); + jit_note(__FILE__, __LINE__); + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT); + + if (c.i.op == OP_META_EXTC) + jit_extr_c(rt, rs); + else + jit_extr_s(rt, rs); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); } -static void rec_meta_MOV(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_MULT2(struct lightrec_cstate *state, + const struct block *block, + u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rs, rd; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + u32 flags = block->opcode_list[offset].flags; + bool is_signed = c.i.op == OP_META_MULT2; + u8 rs, lo, hi, rflags = 0, hiflags = 0; + unsigned int i; + + if (!op_flag_no_hi(flags) && c.r.op < 32) { + rflags = is_signed ? REG_EXT : REG_ZEXT; + hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT); + } _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = c.r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0) : 0; - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT); - if (c.r.rs == 0) { - jit_movi(rd, 0); - } else { -#if __WORDSIZE == 32 - jit_movr(rd, rs); -#else - jit_extr_i(rd, rs); -#endif + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags); + + /* + * We must handle the case where one of the output registers is our rs + * input register. Thanksfully, computing LO/HI can be done in any + * order. Here, we make sure that the computation that overwrites the + * input register is always performed last. + */ + for (i = 0; i < 2; i++) { + if ((!i ^ (reg_lo == c.i.rs)) && !op_flag_no_lo(flags)) { + lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); + + if (c.r.op < 32) + jit_lshi(lo, rs, c.r.op); + else + jit_movi(lo, 0); + + lightrec_free_reg(reg_cache, lo); + continue; + } + + if ((!!i ^ (reg_lo == c.i.rs)) && !op_flag_no_hi(flags)) { + hi = lightrec_alloc_reg_out(reg_cache, _jit, + reg_hi, hiflags); + + if (c.r.op >= 32) + jit_lshi(hi, rs, c.r.op - 32); + else if (is_signed) + jit_rshi(hi, rs, 32 - c.r.op); + else + jit_rshi_u(hi, rs, 32 - c.r.op); + + lightrec_free_reg(reg_cache, hi); + } } - lightrec_free_reg(state->reg_cache, rs); - lightrec_free_reg(state->reg_cache, rd); + lightrec_free_reg(reg_cache, rs); + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); } static const lightrec_rec_func_t rec_standard[64] = { @@ -1618,9 +2486,11 @@ static const lightrec_rec_func_t rec_standard[64] = { [OP_LWC2] = rec_LWC2, [OP_SWC2] = rec_SWC2, - [OP_META_BEQZ] = rec_meta_BEQZ, - [OP_META_BNEZ] = rec_meta_BNEZ, [OP_META_MOV] = rec_meta_MOV, + [OP_META_EXTC] = rec_meta_EXTC_EXTS, + [OP_META_EXTS] = rec_meta_EXTC_EXTS, + [OP_META_MULT2] = rec_meta_MULT2, + [OP_META_MULTU2] = rec_meta_MULT2, }; static const lightrec_rec_func_t rec_special[64] = { @@ -1680,8 +2550,8 @@ static const lightrec_rec_func_t rec_cp2_basic[64] = { [OP_CP2_BASIC_CTC2] = rec_cp2_basic_CTC2, }; -static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SPECIAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_special[c.r.op]; @@ -1692,8 +2562,8 @@ static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_REGIMM(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_REGIMM(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_regimm[c.r.rt]; @@ -1704,8 +2574,8 @@ static void rec_REGIMM(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_CP0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_cp0[c.r.rs]; @@ -1716,8 +2586,8 @@ static void rec_CP0(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_CP2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -1733,17 +2603,19 @@ static void rec_CP2(struct lightrec_state *state, const struct block *block, rec_CP(state, block, offset); } -void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block, - u16 offset) +void lightrec_rec_opcode(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; struct lightrec_branch_target *target; const struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; lightrec_rec_func_t f; + u16 unload_offset; - if (op->flags & LIGHTREC_SYNC) { - jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); + if (op_flag_sync(op->flags)) { + if (state->cycles) + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); state->cycles = 0; lightrec_storeback_regs(reg_cache, _jit); @@ -1764,16 +2636,10 @@ void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block (*f)(state, block, offset); } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true); - pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true); - pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true); - pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt)); + if (OPT_EARLY_UNLOAD) { + unload_offset = offset + + (has_delay_slot(op->c) && !op_flag_no_ds(op->flags)); + + lightrec_do_early_unload(state, block, unload_offset); } } diff --git a/deps/lightrec/emitter.h b/deps/lightrec/emitter.h index d0fb883c9..4cbe8da64 100644 --- a/deps/lightrec/emitter.h +++ b/deps/lightrec/emitter.h @@ -9,11 +9,11 @@ #include "lightrec.h" struct block; +struct lightrec_cstate; struct opcode; -void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block, - u16 offset); -void lightrec_emit_eob(struct lightrec_state *state, const struct block *block, - u16 offset); +void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset); +void lightrec_emit_eob(struct lightrec_cstate *state, + const struct block *block, u16 offset); #endif /* __EMITTER_H__ */ diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 199233815..43bea83f0 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -63,7 +63,7 @@ static inline u32 jump_skip(struct interpreter *inter) inter->op = next_op(inter); inter->offset++; - if (inter->op->flags & LIGHTREC_SYNC) { + if (op_flag_sync(inter->op->flags)) { inter->state->current_cycle += inter->cycles; inter->cycles = 0; } @@ -101,8 +101,8 @@ static void update_cycles_before_branch(struct interpreter *inter) if (!inter->delay_slot) { cycles = lightrec_cycles_of_opcode(inter->op->c); - if (has_delay_slot(inter->op->c) && - !(inter->op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(inter->op->flags) && + has_delay_slot(inter->op->c)) cycles += lightrec_cycles_of_opcode(next_op(inter)->c); inter->cycles += cycles; @@ -120,10 +120,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op) case OP_JAL: return true; case OP_BEQ: - case OP_META_BEQZ: return reg_cache[op.r.rs] == reg_cache[op.r.rt]; case OP_BNE: - case OP_META_BNEZ: return reg_cache[op.r.rs] != reg_cache[op.r.rt]; case OP_REGIMM: switch (op.r.rt) { @@ -144,7 +142,7 @@ static bool is_branch_taken(const u32 *reg_cache, union code op) static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) { struct lightrec_state *state = inter->state; - u32 *reg_cache = state->native_reg_cache; + u32 *reg_cache = state->regs.gpr; struct opcode new_op, *op = next_op(inter); union code op_next; struct interpreter inter2 = { @@ -169,8 +167,8 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) * but on branch boundaries, we need to adjust the return * address so that the GTE opcode is effectively executed. */ - cause = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 13); - epc = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 14); + cause = state->regs.cp0[13]; + epc = state->regs.cp0[14]; if (!(cause & 0x7c) && epc == pc - 4) pc -= 4; @@ -329,9 +327,9 @@ static u32 int_jump(struct interpreter *inter, bool link) u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2); if (link) - state->native_reg_cache[31] = old_pc + 8; + state->regs.gpr[31] = old_pc + 8; - if (inter->op->flags & LIGHTREC_NO_DS) + if (op_flag_no_ds(inter->op->flags)) return pc; return int_delay_slot(inter, pc, true); @@ -350,14 +348,18 @@ static u32 int_JAL(struct interpreter *inter) static u32 int_jumpr(struct interpreter *inter, u8 link_reg) { struct lightrec_state *state = inter->state; - u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs]; + u32 old_pc = int_get_branch_pc(inter); + u32 next_pc = state->regs.gpr[inter->op->r.rs]; - if (link_reg) { - old_pc = int_get_branch_pc(inter); - state->native_reg_cache[link_reg] = old_pc + 8; + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return old_pc; } - if (inter->op->flags & LIGHTREC_NO_DS) + if (link_reg) + state->regs.gpr[link_reg] = old_pc + 8; + + if (op_flag_no_ds(inter->op->flags)) return next_pc; return int_delay_slot(inter, next_pc, true); @@ -375,8 +377,7 @@ static u32 int_special_JALR(struct interpreter *inter) static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc) { - if (!inter->delay_slot && - (inter->op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) && (s16)inter->op->c.i.imm >= 0) { next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2); next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc); @@ -390,9 +391,14 @@ static u32 int_branch(struct interpreter *inter, u32 pc, { u32 next_pc = pc + 4 + ((s16)code.i.imm << 2); + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return pc; + } + update_cycles_before_branch(inter); - if (inter->op->flags & LIGHTREC_NO_DS) { + if (op_flag_no_ds(inter->op->flags)) { if (branch) return int_do_branch(inter, pc, next_pc); else @@ -405,7 +411,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc, if (branch) return int_do_branch(inter, pc, next_pc); - if (inter->op->flags & LIGHTREC_EMULATE_BRANCH) + if (op_flag_emulate_branch(inter->op->flags)) return pc + 8; else return jump_after_branch(inter); @@ -415,8 +421,8 @@ static u32 int_beq(struct interpreter *inter, bool bne) { u32 rs, rt, old_pc = int_get_branch_pc(inter); - rs = inter->state->native_reg_cache[inter->op->i.rs]; - rt = inter->state->native_reg_cache[inter->op->i.rt]; + rs = inter->state->regs.gpr[inter->op->i.rs]; + rt = inter->state->regs.gpr[inter->op->i.rt]; return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne); } @@ -437,9 +443,9 @@ static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm) s32 rs; if (link) - inter->state->native_reg_cache[31] = old_pc + 8; + inter->state->regs.gpr[31] = old_pc + 8; - rs = (s32)inter->state->native_reg_cache[inter->op->i.rs]; + rs = (s32)inter->state->regs.gpr[inter->op->i.rs]; return int_branch(inter, old_pc, inter->op->c, ((regimm && !rs) || rs > 0) ^ lt); @@ -484,7 +490,7 @@ static u32 int_cfc(struct interpreter *inter) val = lightrec_mfc(state, op->c); if (likely(op->r.rt)) - state->native_reg_cache[op->r.rt] = val; + state->regs.gpr[op->r.rt] = val; return jump_next(inter); } @@ -494,12 +500,12 @@ static u32 int_ctc(struct interpreter *inter) struct lightrec_state *state = inter->state; const struct opcode *op = inter->op; - lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]); + lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]); /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause), * return early so that the emulator will be able to check software * interrupt status. */ - if (!(inter->op->flags & LIGHTREC_NO_DS) && + if (!op_flag_no_ds(inter->op->flags) && op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13)) return int_get_ds_pc(inter, 1); else @@ -508,40 +514,21 @@ static u32 int_ctc(struct interpreter *inter) static u32 int_cp0_RFE(struct interpreter *inter) { - struct lightrec_state *state = inter->state; - u32 status; - - /* Read CP0 Status register (r12) */ - status = state->ops.cop0_ops.mfc(state, inter->op->c.opcode, 12); - - /* Switch the bits */ - status = ((status & 0x3c) >> 2) | (status & ~0xf); - - /* Write it back */ - state->ops.cop0_ops.ctc(state, inter->op->c.opcode, 12, status); + lightrec_rfe(inter->state); return jump_next(inter); } static u32 int_CP(struct interpreter *inter) { - struct lightrec_state *state = inter->state; - const struct lightrec_cop_ops *ops; - const struct opcode *op = inter->op; - - if (op->i.op == OP_CP2) - ops = &state->ops.cop2_ops; - else - ops = &state->ops.cop0_ops; - - (*ops->op)(state, (op->j.imm) & ~(1 << 25)); + lightrec_cp(inter->state, inter->op->c); return jump_next(inter); } static u32 int_ADDI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -552,7 +539,7 @@ static u32 int_ADDI(struct interpreter *inter) static u32 int_SLTI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -563,7 +550,7 @@ static u32 int_SLTI(struct interpreter *inter) static u32 int_SLTIU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -574,7 +561,7 @@ static u32 int_SLTIU(struct interpreter *inter) static u32 int_ANDI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -585,7 +572,7 @@ static u32 int_ANDI(struct interpreter *inter) static u32 int_ORI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -596,7 +583,7 @@ static u32 int_ORI(struct interpreter *inter) static u32 int_XORI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -609,7 +596,7 @@ static u32 int_LUI(struct interpreter *inter) { struct opcode_i *op = &inter->op->i; - inter->state->native_reg_cache[op->rt] = op->imm << 16; + inter->state->regs.gpr[op->rt] = op->imm << 16; return jump_next(inter); } @@ -617,7 +604,7 @@ static u32 int_LUI(struct interpreter *inter) static u32 int_io(struct interpreter *inter, bool is_load) { struct opcode_i *op = &inter->op->i; - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 val; val = lightrec_rw(inter->state, inter->op->c, @@ -639,12 +626,12 @@ static u32 int_store(struct interpreter *inter) { u32 next_pc; - if (likely(!(inter->op->flags & LIGHTREC_SMC))) + if (likely(!op_flag_smc(inter->op->flags))) return int_io(inter, false); lightrec_rw(inter->state, inter->op->c, - inter->state->native_reg_cache[inter->op->i.rs], - inter->state->native_reg_cache[inter->op->i.rt], + inter->state->regs.gpr[inter->op->i.rs], + inter->state->regs.gpr[inter->op->i.rt], &inter->op->flags, inter->block); next_pc = int_get_ds_pc(inter, 1); @@ -666,8 +653,8 @@ static u32 int_special_SLL(struct interpreter *inter) u32 rt; if (op->opcode) { /* Handle NOPs */ - rt = inter->state->native_reg_cache[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm; + rt = inter->state->regs.gpr[op->r.rt]; + inter->state->regs.gpr[op->r.rd] = rt << op->r.imm; } return jump_next(inter); @@ -676,9 +663,9 @@ static u32 int_special_SLL(struct interpreter *inter) static u32 int_special_SRL(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm; return jump_next(inter); } @@ -686,9 +673,9 @@ static u32 int_special_SRL(struct interpreter *inter) static u32 int_special_SRA(struct interpreter *inter) { struct opcode *op = inter->op; - s32 rt = inter->state->native_reg_cache[op->r.rt]; + s32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm; return jump_next(inter); } @@ -696,10 +683,10 @@ static u32 int_special_SRA(struct interpreter *inter) static u32 int_special_SLLV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f); return jump_next(inter); } @@ -707,10 +694,10 @@ static u32 int_special_SLLV(struct interpreter *inter) static u32 int_special_SRLV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f); return jump_next(inter); } @@ -718,10 +705,10 @@ static u32 int_special_SRLV(struct interpreter *inter) static u32 int_special_SRAV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - s32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + s32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f); return jump_next(inter); } @@ -739,7 +726,7 @@ static u32 int_syscall_break(struct interpreter *inter) static u32 int_special_MFHI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -750,7 +737,7 @@ static u32 int_special_MFHI(struct interpreter *inter) static u32 int_special_MTHI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; reg_cache[REG_HI] = reg_cache[inter->op->r.rs]; @@ -759,7 +746,7 @@ static u32 int_special_MTHI(struct interpreter *inter) static u32 int_special_MFLO(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -770,7 +757,7 @@ static u32 int_special_MFLO(struct interpreter *inter) static u32 int_special_MTLO(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; reg_cache[REG_LO] = reg_cache[inter->op->r.rs]; @@ -779,16 +766,16 @@ static u32 int_special_MTLO(struct interpreter *inter) static u32 int_special_MULT(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; s32 rs = reg_cache[inter->op->r.rs]; s32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (s64)rs * (s64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -796,16 +783,16 @@ static u32 int_special_MULT(struct interpreter *inter) static u32 int_special_MULTU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 rs = reg_cache[inter->op->r.rs]; u32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (u64)rs * (u64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -813,7 +800,7 @@ static u32 int_special_MULTU(struct interpreter *inter) static u32 int_special_DIV(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; s32 rs = reg_cache[inter->op->r.rs]; s32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); @@ -828,9 +815,9 @@ static u32 int_special_DIV(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); @@ -838,7 +825,7 @@ static u32 int_special_DIV(struct interpreter *inter) static u32 int_special_DIVU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 rs = reg_cache[inter->op->r.rs]; u32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); @@ -853,9 +840,9 @@ static u32 int_special_DIVU(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); @@ -863,7 +850,7 @@ static u32 int_special_DIVU(struct interpreter *inter) static u32 int_special_ADD(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; s32 rs = reg_cache[op->rs]; s32 rt = reg_cache[op->rt]; @@ -876,7 +863,7 @@ static u32 int_special_ADD(struct interpreter *inter) static u32 int_special_SUB(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -889,7 +876,7 @@ static u32 int_special_SUB(struct interpreter *inter) static u32 int_special_AND(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -902,7 +889,7 @@ static u32 int_special_AND(struct interpreter *inter) static u32 int_special_OR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -915,7 +902,7 @@ static u32 int_special_OR(struct interpreter *inter) static u32 int_special_XOR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -928,7 +915,7 @@ static u32 int_special_XOR(struct interpreter *inter) static u32 int_special_NOR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -941,7 +928,7 @@ static u32 int_special_NOR(struct interpreter *inter) static u32 int_special_SLT(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; s32 rs = reg_cache[op->rs]; s32 rt = reg_cache[op->rt]; @@ -954,7 +941,7 @@ static u32 int_special_SLT(struct interpreter *inter) static u32 int_special_SLTU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -967,7 +954,7 @@ static u32 int_special_SLTU(struct interpreter *inter) static u32 int_META_MOV(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -976,6 +963,55 @@ static u32 int_META_MOV(struct interpreter *inter) return jump_next(inter); } +static u32 int_META_EXTC(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs]; + + return jump_next(inter); +} + +static u32 int_META_EXTS(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs]; + + return jump_next(inter); +} + +static u32 int_META_MULT2(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + union code c = inter->op->c; + u32 rs = reg_cache[c.r.rs]; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + + if (!op_flag_no_lo(inter->op->flags)) { + if (c.r.op < 32) + reg_cache[reg_lo] = rs << c.r.op; + else + reg_cache[reg_lo] = 0; + } + + if (!op_flag_no_hi(inter->op->flags)) { + if (c.r.op >= 32) + reg_cache[reg_hi] = rs << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op); + else + reg_cache[reg_hi] = rs >> (32 - c.r.op); + } + + return jump_next(inter); +} + static const lightrec_int_func_t int_standard[64] = { SET_DEFAULT_ELM(int_standard, int_unimplemented), [OP_SPECIAL] = int_SPECIAL, @@ -1011,9 +1047,11 @@ static const lightrec_int_func_t int_standard[64] = { [OP_LWC2] = int_LWC2, [OP_SWC2] = int_store, - [OP_META_BEQZ] = int_BEQ, - [OP_META_BNEZ] = int_BNE, [OP_META_MOV] = int_META_MOV, + [OP_META_EXTC] = int_META_EXTC, + [OP_META_EXTS] = int_META_EXTS, + [OP_META_MULT2] = int_META_MULT2, + [OP_META_MULTU2] = int_META_MULT2, }; static const lightrec_int_func_t int_special[64] = { @@ -1146,5 +1184,7 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3 pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + return 0; } diff --git a/deps/lightrec/lightning-wrapper.h b/deps/lightrec/lightning-wrapper.h new file mode 100644 index 000000000..b0e8bf3bb --- /dev/null +++ b/deps/lightrec/lightning-wrapper.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2022 Paul Cercueil + */ + +#ifndef __LIGHTNING_WRAPPER_H__ +#define __LIGHTNING_WRAPPER_H__ + +#include + +#if __WORDSIZE == 32 + +#define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w) +#define jit_stxi_ui(u,v,w) jit_stxi_i(u,v,w) +#define jit_extr_i(u,v) jit_movr(u,v) +#define jit_extr_ui(u,v) jit_movr(u,v) +#define jit_retval_ui(u) jit_retval(u) +#define jit_getarg_ui(u,v) jit_getarg_i(u,v) + +#endif + +#define jit_b() jit_beqr(0, 0) + +#endif /* __LIGHTNING_WRAPPER_H__ */ diff --git a/deps/lightrec/config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein similarity index 91% rename from deps/lightrec/config.h.cmakein rename to deps/lightrec/lightrec-config.h.cmakein index 47eac522c..11886653a 100644 --- a/deps/lightrec/config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -9,7 +9,7 @@ #cmakedefine01 ENABLE_THREADED_COMPILER #cmakedefine01 ENABLE_FIRST_PASS #cmakedefine01 ENABLE_DISASSEMBLER -#cmakedefine01 ENABLE_TINYMM +#cmakedefine01 ENABLE_CODE_BUFFER #cmakedefine01 HAS_DEFAULT_ELM @@ -20,6 +20,7 @@ #cmakedefine01 OPT_LOCAL_BRANCHES #cmakedefine01 OPT_SWITCH_DELAY_SLOTS #cmakedefine01 OPT_FLAG_STORES +#cmakedefine01 OPT_FLAG_IO #cmakedefine01 OPT_FLAG_MULT_DIV #cmakedefine01 OPT_EARLY_UNLOAD diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 86ca1b826..56032f500 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -6,16 +6,25 @@ #ifndef __LIGHTREC_PRIVATE_H__ #define __LIGHTREC_PRIVATE_H__ -#include "config.h" +#include "lightning-wrapper.h" +#include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" +#include "regcache.h" #if ENABLE_THREADED_COMPILER #include #endif +#ifdef _MSC_BUILD +#include +#endif + #define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0) +#define GENMASK(h, l) \ + (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) + #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x),1) # define unlikely(x) __builtin_expect(!!(x),0) @@ -42,12 +51,28 @@ #define SET_DEFAULT_ELM(table, value) [0] = NULL #endif +#define fallthrough do {} while (0) /* fall-through */ + +#define container_of(ptr, type, member) \ + ((type *)((void *)(ptr) - offsetof(type, member))) + +#ifdef _MSC_BUILD +# define popcount32(x) __popcnt(x) +# define clz32(x) _lzcnt_u32(x) +# define ctz32(x) _tzcnt_u32(x) +#else +# define popcount32(x) __builtin_popcount(x) +# define clz32(x) __builtin_clz(x) +# define ctz32(x) __builtin_ctz(x) +#endif + /* Flags for (struct block *)->flags */ #define BLOCK_NEVER_COMPILE BIT(0) #define BLOCK_SHOULD_RECOMPILE BIT(1) #define BLOCK_FULLY_TAGGED BIT(2) #define BLOCK_IS_DEAD BIT(3) #define BLOCK_IS_MEMSET BIT(4) +#define BLOCK_NO_OPCODE_LIST BIT(5) #define RAM_SIZE 0x200000 #define BIOS_SIZE 0x80000 @@ -66,9 +91,16 @@ struct blockcache; struct recompiler; struct regcache; struct opcode; -struct tinymm; struct reaper; +struct u16x2 { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u16 h, l; +#else + u16 l, h; +#endif +}; + struct block { jit_state_t *_jit; struct opcode *opcode_list; @@ -77,11 +109,13 @@ struct block { struct block *next; u32 pc; u32 hash; + u32 precompile_date; unsigned int code_size; u16 nb_ops; - u8 flags; #if ENABLE_THREADED_COMPILER - atomic_flag op_list_freed; + _Atomic u8 flags; +#else + u8 flags; #endif }; @@ -100,55 +134,66 @@ enum c_wrappers { C_WRAPPER_RW_GENERIC, C_WRAPPER_MFC, C_WRAPPER_MTC, - C_WRAPPER_RFE, C_WRAPPER_CP, - C_WRAPPER_SYSCALL, - C_WRAPPER_BREAK, C_WRAPPERS_COUNT, }; +struct lightrec_cstate { + struct lightrec_state *state; + + struct lightrec_branch local_branches[512]; + struct lightrec_branch_target targets[512]; + unsigned int nb_local_branches; + unsigned int nb_targets; + unsigned int cycles; + + struct regcache *reg_cache; +}; + struct lightrec_state { - u32 native_reg_cache[34]; + struct lightrec_registers regs; + uintptr_t wrapper_regs[NUM_TEMPS]; u32 next_pc; u32 current_cycle; u32 target_cycle; u32 exit_flags; u32 old_cycle_counter; struct block *dispatcher, *c_wrapper_block; - void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT]; - struct jit_node *branches[512]; - struct lightrec_branch local_branches[512]; - struct lightrec_branch_target targets[512]; - unsigned int nb_branches; - unsigned int nb_local_branches; - unsigned int nb_targets; - struct tinymm *tinymm; + void *c_wrappers[C_WRAPPERS_COUNT]; + void *wrappers_eps[C_WRAPPERS_COUNT]; struct blockcache *block_cache; - struct regcache *reg_cache; struct recompiler *rec; + struct lightrec_cstate *cstate; struct reaper *reaper; + void *tlsf; void (*eob_wrapper_func)(void); void (*memset_func)(void); void (*get_next_block)(void); struct lightrec_ops ops; unsigned int nb_precompile; - unsigned int cycles; unsigned int nb_maps; const struct lightrec_mem_map *maps; - uintptr_t offset_ram, offset_bios, offset_scratch; + uintptr_t offset_ram, offset_bios, offset_scratch, offset_io; + _Bool with_32bit_lut; _Bool mirrors_mapped; _Bool invalidate_from_dma_only; void *code_lut[]; }; u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, + u32 addr, u32 data, u32 *flags, struct block *block); void lightrec_free_block(struct lightrec_state *state, struct block *block); void remove_from_code_lut(struct blockcache *cache, struct block *block); +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr); + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr); + static inline u32 kunseg(u32 addr) { if (unlikely(addr >= 0xa0000000)) @@ -165,11 +210,55 @@ static inline u32 lut_offset(u32 pc) return (pc & (RAM_SIZE - 1)) >> 2; // RAM } +static inline _Bool is_big_endian(void) +{ + return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; +} + +static inline _Bool lut_is_32bit(const struct lightrec_state *state) +{ + return __WORDSIZE == 32 || + (ENABLE_CODE_BUFFER && state->with_32bit_lut); +} + +static inline size_t lut_elm_size(const struct lightrec_state *state) +{ + return lut_is_32bit(state) ? 4 : sizeof(void *); +} + +static inline void ** lut_address(struct lightrec_state *state, u32 offset) +{ + if (lut_is_32bit(state)) + return (void **) ((uintptr_t) state->code_lut + offset * 4); + else + return &state->code_lut[offset]; +} + +static inline void * lut_read(struct lightrec_state *state, u32 offset) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + return (void *)(uintptr_t) *(u32 *) lut_entry; + else + return *lut_entry; +} + +static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + *(u32 *) lut_entry = (u32)(uintptr_t) ptr; + else + *lut_entry = ptr; +} + static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset += op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } @@ -178,19 +267,24 @@ static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset -= op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } void lightrec_mtc(struct lightrec_state *state, union code op, u32 data); u32 lightrec_mfc(struct lightrec_state *state, union code op); +void lightrec_rfe(struct lightrec_state *state); +void lightrec_cp(struct lightrec_state *state, union code op); + +struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state); +void lightrec_free_cstate(struct lightrec_cstate *cstate); union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc); -int lightrec_compile_block(struct lightrec_state *state, struct block *block); -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block); +int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); +void lightrec_free_opcode_list(struct lightrec_state *state, + struct opcode *list); unsigned int lightrec_cycles_of_opcode(union code code); @@ -204,4 +298,46 @@ static inline u8 get_mult_div_hi(union code c) return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI; } +static inline s16 s16_max(s16 a, s16 b) +{ + return a > b ? a : b; +} + +static inline _Bool block_has_flag(struct block *block, u8 flag) +{ +#if ENABLE_THREADED_COMPILER + return atomic_load_explicit(&block->flags, memory_order_relaxed) & flag; +#else + return block->flags & flag; +#endif +} + +static inline u8 block_set_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_or_explicit(&block->flags, mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags |= mask; + + return flags; +#endif +} + +static inline u8 block_clear_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_and_explicit(&block->flags, ~mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags &= ~mask; + + return flags; +#endif +} + #endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 5d54f0475..be4da10f4 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -4,20 +4,22 @@ */ #include "blockcache.h" -#include "config.h" #include "debug.h" #include "disassembler.h" #include "emitter.h" #include "interpreter.h" +#include "lightrec-config.h" +#include "lightning-wrapper.h" #include "lightrec.h" #include "memmanager.h" #include "reaper.h" #include "recompiler.h" #include "regcache.h" #include "optimizer.h" +#include "tlsf/tlsf.h" #include -#include +#include #include #if ENABLE_THREADED_COMPILER #include @@ -25,15 +27,13 @@ #include #include #include -#if ENABLE_TINYMM -#include -#endif - -#define GENMASK(h, l) \ - (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) static struct block * lightrec_precompile_block(struct lightrec_state *state, u32 pc); +static bool lightrec_block_is_fully_tagged(const struct block *block); + +static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data); +static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg); static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u8 data) @@ -104,7 +104,7 @@ static void lightrec_swl(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, (shift + 1) * 8); + unsigned int mask = shift < 3 ? GENMASK(31, (shift + 1) * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -141,7 +141,7 @@ static void lightrec_swc2(struct lightrec_state *state, union code op, const struct lightrec_mem_map_ops *ops, void *host, u32 addr) { - u32 data = state->ops.cop2_ops.mfc(state, op.opcode, op.i.rt); + u32 data = lightrec_mfc2(state, op.i.rt); ops->sw(state, op.opcode, host, addr, data); } @@ -168,7 +168,7 @@ static u32 lightrec_lwr(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, 32 - shift * 8); + unsigned int mask = shift ? GENMASK(31, 32 - shift * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -186,38 +186,46 @@ static void lightrec_lwc2(struct lightrec_state *state, union code op, { u32 data = ops->lw(state, op.opcode, host, addr); - state->ops.cop2_ops.mtc(state, op.opcode, op.i.rt, data); + lightrec_mtc2(state, op.i.rt, data); } static void lightrec_invalidate_map(struct lightrec_state *state, const struct lightrec_mem_map *map, u32 addr, u32 len) { if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { - memset(&state->code_lut[lut_offset(addr)], 0, - ((len + 3) / 4) * sizeof(void *)); + memset(lut_address(state, lut_offset(addr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } } -static const struct lightrec_mem_map * -lightrec_get_map(struct lightrec_state *state, - void **host, u32 kaddr) +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; unsigned int i; - u32 addr; for (i = 0; i < state->nb_maps; i++) { - const struct lightrec_mem_map *mapi = &state->maps[i]; + map = &state->maps[i]; - if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) { - map = mapi; - break; - } + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return (enum psx_map) i; } - if (i == state->nb_maps) + return PSX_MAP_UNKNOWN; +} + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +{ + const struct lightrec_mem_map *map; + enum psx_map idx; + u32 addr; + + idx = lightrec_get_map_idx(state, kaddr); + if (idx == PSX_MAP_UNKNOWN) return NULL; + map = &state->maps[idx]; addr = kaddr - map->pc; while (map->mirror_of) @@ -230,7 +238,7 @@ lightrec_get_map(struct lightrec_state *state, } u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, struct block *block) + u32 addr, u32 data, u32 *flags, struct block *block) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; @@ -245,16 +253,20 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, return 0; } - if (unlikely(map->ops)) { - if (flags) - *flags |= LIGHTREC_HW_IO; - ops = map->ops; - } else { - if (flags) - *flags |= LIGHTREC_DIRECT_IO; + if (likely(!map->ops)) { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); ops = &lightrec_default_ops; + } else if (flags && + LIGHTREC_FLAGS_GET_IO_MODE(*flags) == LIGHTREC_IO_DIRECT_HW) { + ops = &lightrec_default_ops; + } else { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + + ops = map->ops; } switch (op.i.op) { @@ -298,13 +310,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, } static void lightrec_rw_helper(struct lightrec_state *state, - union code op, u16 *flags, + union code op, u32 *flags, struct block *block) { - u32 ret = lightrec_rw(state, op, - state->native_reg_cache[op.i.rs], - state->native_reg_cache[op.i.rt], flags, - block); + u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], + state->regs.gpr[op.i.rt], flags, block); switch (op.i.op) { case OP_LB: @@ -315,15 +325,16 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWR: case OP_LW: if (op.i.rt) - state->native_reg_cache[op.i.rt] = ret; - default: /* fall-through */ + state->regs.gpr[op.i.rt] = ret; + fallthrough; + default: break; } } -static void lightrec_rw_cb(struct lightrec_state *state, union code op) +static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, op, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) @@ -332,46 +343,115 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) struct opcode *op; bool was_tagged; u16 offset = (u16)arg; + u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->next_pc); if (unlikely(!block)) { pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", state->next_pc, offset); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } op = &block->opcode_list[offset]; - was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); lightrec_rw_helper(state, op->c, &op->flags, block); if (!was_tagged) { - pr_debug("Opcode of block at PC 0x%08x has been tagged - flag " - "for recompilation\n", block->pc); + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); + + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } + } +} + +static u32 clamp_s32(s32 val, s32 min, s32 max) +{ + return val < min ? min : val > max ? max : val; +} - block->flags |= BLOCK_SHOULD_RECOMPILE; +static u16 load_u16(u32 *ptr) +{ + return ((struct u16x2 *) ptr)->l; +} + +static void store_u16(u32 *ptr, u16 value) +{ + ((struct u16x2 *) ptr)->l = value; +} + +static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) +{ + s16 gteir1, gteir2, gteir3; + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + return (s32)(s16) load_u16(&state->regs.cp2d[reg]); + case 7: + case 16: + case 17: + case 18: + case 19: + return load_u16(&state->regs.cp2d[reg]); + case 28: + case 29: + gteir1 = (s16) load_u16(&state->regs.cp2d[9]); + gteir2 = (s16) load_u16(&state->regs.cp2d[10]); + gteir3 = (s16) load_u16(&state->regs.cp2d[11]); + + return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 | + clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 | + clamp_s32(gteir3 >> 7, 0, 0x1f) << 10; + case 15: + reg = 14; + fallthrough; + default: + return state->regs.cp2d[reg]; } } u32 lightrec_mfc(struct lightrec_state *state, union code op) { - bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) || - (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2); - u32 (*func)(struct lightrec_state *, u32, u8); - const struct lightrec_cop_ops *ops; + u32 val; if (op.i.op == OP_CP0) - ops = &state->ops.cop0_ops; - else - ops = &state->ops.cop2_ops; + return state->regs.cp0[op.r.rd]; + else if (op.r.rs == OP_CP2_BASIC_MFC2) + val = lightrec_mfc2(state, op.r.rd); + else { + val = state->regs.cp2c[op.r.rd]; + + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + val = (u32)(s16)val; + fallthrough; + default: + break; + } + } - if (is_cfc) - func = ops->cfc; - else - func = ops->mfc; + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, val); - return (*func)(state, op.opcode, op.r.rd); + return val; } static void lightrec_mfc_cb(struct lightrec_state *state, union code op) @@ -379,85 +459,194 @@ static void lightrec_mfc_cb(struct lightrec_state *state, union code op) u32 rt = lightrec_mfc(state, op); if (op.r.rt) - state->native_reg_cache[op.r.rt] = rt; + state->regs.gpr[op.r.rt] = rt; } -void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) +{ + u32 status, oldstatus, cause; + + switch (reg) { + case 1: + case 4: + case 8: + case 14: + case 15: + /* Those registers are read-only */ + return; + default: + break; + } + + if (reg == 12) { + status = state->regs.cp0[12]; + oldstatus = status; + + if (status & ~data & BIT(16)) { + state->ops.enable_ram(state, true); + lightrec_invalidate_all(state); + } else if (~status & data & BIT(16)) { + state->ops.enable_ram(state, false); + } + } + + if (reg == 13) { + state->regs.cp0[13] &= ~0x300; + state->regs.cp0[13] |= data & 0x300; + } else { + state->regs.cp0[reg] = data; + } + + if (reg == 12 || reg == 13) { + cause = state->regs.cp0[13]; + status = state->regs.cp0[12]; + + /* Handle software interrupts */ + if (!!(status & cause & 0x300) & status) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + + /* Handle hardware interrupts */ + if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401)) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + } +} + +static u32 count_leading_bits(s32 data) { - bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) || - (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2); - void (*func)(struct lightrec_state *, u32, u8, u32); - const struct lightrec_cop_ops *ops; + u32 cnt = 33; - if (op.i.op == OP_CP0) - ops = &state->ops.cop0_ops; - else - ops = &state->ops.cop2_ops; +#ifdef __has_builtin +#if __has_builtin(__builtin_clrsb) + return 1 + __builtin_clrsb(data); +#endif +#endif - if (is_ctc) - func = ops->ctc; - else - func = ops->mtc; + data = (data ^ (data >> 31)) << 1; + + do { + cnt -= 1; + data >>= 1; + } while (data); - (*func)(state, op.opcode, op.r.rd, data); + return cnt; } -static void lightrec_mtc_cb(struct lightrec_state *state, union code op) +static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data) { - lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]); + switch (reg) { + case 15: + state->regs.cp2d[12] = state->regs.cp2d[13]; + state->regs.cp2d[13] = state->regs.cp2d[14]; + state->regs.cp2d[14] = data; + break; + case 28: + state->regs.cp2d[9] = (data << 7) & 0xf80; + state->regs.cp2d[10] = (data << 2) & 0xf80; + state->regs.cp2d[11] = (data >> 3) & 0xf80; + break; + case 31: + return; + case 30: + state->regs.cp2d[31] = count_leading_bits((s32) data); + fallthrough; + default: + state->regs.cp2d[reg] = data; + break; + } } -static void lightrec_rfe_cb(struct lightrec_state *state, union code op) +static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data) +{ + switch (reg) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + store_u16(&state->regs.cp2c[reg], data); + break; + case 31: + data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31; + fallthrough; + default: + state->regs.cp2c[reg] = data; + break; + } +} + +void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +{ + if (op.i.op == OP_CP0) { + lightrec_mtc0(state, op.r.rd, data); + } else { + if (op.r.rs == OP_CP2_BASIC_CTC2) + lightrec_ctc2(state, op.r.rd, data); + else + lightrec_mtc2(state, op.r.rd, data); + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, data); + } +} + +static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) +{ + union code op = (union code) arg; + + lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); +} + +void lightrec_rfe(struct lightrec_state *state) { u32 status; /* Read CP0 Status register (r12) */ - status = state->ops.cop0_ops.mfc(state, op.opcode, 12); + status = state->regs.cp0[12]; /* Switch the bits */ status = ((status & 0x3c) >> 2) | (status & ~0xf); /* Write it back */ - state->ops.cop0_ops.ctc(state, op.opcode, 12, status); + lightrec_mtc0(state, 12, status); } -static void lightrec_cp_cb(struct lightrec_state *state, union code op) +void lightrec_cp(struct lightrec_state *state, union code op) { - void (*func)(struct lightrec_state *, u32); - - if (op.i.op == OP_CP2) - func = state->ops.cop2_ops.op; - else - func = state->ops.cop0_ops.op; - - (*func)(state, op.opcode); -} + if (op.i.op == OP_CP0) { + pr_err("Invalid CP opcode to coprocessor #0\n"); + return; + } -static void lightrec_syscall_cb(struct lightrec_state *state, union code op) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); + (*state->ops.cop2_op)(state, op.opcode); } -static void lightrec_break_cb(struct lightrec_state *state, union code op) +static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) { - lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); + lightrec_cp(state, (union code) arg); } -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) +static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); + u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); - /* Make sure the recompiler isn't processing the block we'll - * destroy */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_remove(state->rec, block); + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + if (!(old_flags & BLOCK_IS_DEAD)) { + /* Make sure the recompiler isn't processing the block + * we'll destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + remove_from_code_lut(state->block_cache, block); + lightrec_free_block(state, block); + } - lightrec_unregister_block(state->block_cache, block); - remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); block = NULL; } @@ -480,9 +669,10 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) struct block *block; bool should_recompile; void *func; + int err; for (;;) { - func = state->code_lut[lut_offset(pc)]; + func = lut_read(state, lut_offset(pc)); if (func && func != state->get_next_block) break; @@ -491,23 +681,27 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (unlikely(!block)) break; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) { + if (OPT_REPLACE_MEMSET && + block_has_flag(block, BLOCK_IS_MEMSET)) { func = state->memset_func; break; } - should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE && - !(block->flags & BLOCK_IS_DEAD); + should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { pr_debug("Block at PC 0x%08x should recompile\n", pc); - lightrec_unregister(MEM_FOR_CODE, block->code_size); - - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); - else - lightrec_compile_block(state, block); + } else { + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } } if (ENABLE_THREADED_COMPILER && likely(!should_recompile)) @@ -518,18 +712,33 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (likely(func)) break; - /* Block wasn't compiled yet - run the interpreter */ - if (!ENABLE_THREADED_COMPILER && - ((ENABLE_FIRST_PASS && likely(!should_recompile)) || - unlikely(block->flags & BLOCK_NEVER_COMPILE))) + if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) { pc = lightrec_emulate_block(state, block, pc); - if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) { + } else if (!ENABLE_THREADED_COMPILER) { + /* Block wasn't compiled yet - run the interpreter */ + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) + pr_debug("Block fully tagged, skipping first pass\n"); + else if (ENABLE_FIRST_PASS && likely(!should_recompile)) + pc = lightrec_emulate_block(state, block, pc); + /* Then compile it using the profiled data */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_add(state->rec, block); - else - lightrec_compile_block(state, block); + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) { + /* + * If the block is dead but has never been compiled, + * then its function pointer is NULL and we cannot + * execute the block. In that case, reap all the dead + * blocks now, and in the next loop we will create a + * new block. + */ + lightrec_reaper_reap(state->reaper); + } else { + lightrec_recompiler_add(state->rec, block); } if (state->exit_flags != LIGHTREC_EXIT_NORMAL || @@ -541,15 +750,104 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) return func; } -static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *, u32 d), - u32 d) +static void * lightrec_alloc_code(struct lightrec_state *state, size_t size) { - state->current_cycle = state->target_cycle - cycles_delta; + void *code; - (*f)(state, d); + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + code = tlsf_malloc(state->tlsf, size); - return state->target_cycle - state->current_cycle; + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); + + return code; +} + +static void lightrec_realloc_code(struct lightrec_state *state, + void *ptr, size_t size) +{ + /* NOTE: 'size' MUST be smaller than the size specified during + * the allocation. */ + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_realloc(state->tlsf, ptr, size); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void lightrec_free_code(struct lightrec_state *state, void *ptr) +{ + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_free(state->tlsf, ptr); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void * lightrec_emit_code(struct lightrec_state *state, + const struct block *block, + jit_state_t *_jit, unsigned int *size) +{ + bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf; + jit_word_t code_size, new_code_size; + void *code; + + jit_realize(); + + if (!ENABLE_DISASSEMBLER) + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + if (has_code_buffer) { + jit_get_code(&code_size); + code = lightrec_alloc_code(state, (size_t) code_size); + + if (!code) { + if (ENABLE_THREADED_COMPILER) { + /* If we're using the threaded compiler, return + * an allocation error here. The threaded + * compiler will then empty its job queue and + * request a code flush using the reaper. */ + return NULL; + } + + /* Remove outdated blocks, and try again */ + lightrec_remove_outdated_blocks(state->block_cache, block); + + pr_debug("Re-try to alloc %zu bytes...\n", code_size); + + code = lightrec_alloc_code(state, code_size); + if (!code) { + pr_err("Could not alloc even after removing old blocks!\n"); + return NULL; + } + } + + jit_set_code(code, code_size); + } + + code = jit_emit(); + + jit_get_code(&new_code_size); + lightrec_register(MEM_FOR_CODE, new_code_size); + + if (has_code_buffer) { + lightrec_realloc_code(state, code, (size_t) new_code_size); + + pr_debug("Creating code block at address 0x%" PRIxPTR ", " + "code size: %" PRIuPTR " new: %" PRIuPTR "\n", + (uintptr_t) code, code_size, new_code_size); + } + + *size = (unsigned int) new_code_size; + + return code; } static struct block * generate_wrapper(struct lightrec_state *state) @@ -557,9 +855,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - int stack_ptr; - jit_word_t code_size; - jit_node_t *to_tramp, *to_fn_epilog; + jit_node_t *addr[C_WRAPPERS_COUNT - 1]; + jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -574,58 +871,82 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Wrapper entry point */ jit_prolog(); + jit_tramp(256); - stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); - - for (i = 0; i < NUM_TEMPS; i++) - jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); - - /* Jump to the trampoline */ - to_tramp = jit_jmpi(); + /* Add entry points */ + for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[i])); + to_end[i - 1] = jit_b(); + addr[i - 1] = jit_indirect(); + } - /* The trampoline will jump back here */ - to_fn_epilog = jit_label(); + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[0])); - for (i = 0; i < NUM_TEMPS; i++) - jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + jit_patch(to_end[i]); - jit_ret(); jit_epilog(); - - /* Trampoline entry point. - * The sole purpose of the trampoline is to cheese Lightning not to - * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we - * do want to return to the caller with this register modified. */ jit_prolog(); - jit_tramp(256); - jit_patch(to_tramp); + + /* Save all temporaries on stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); + } + } + + jit_getarg(JIT_R2, jit_arg()); jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(LIGHTREC_REG_CYCLE); - jit_pushargr(JIT_R0); - jit_pushargr(JIT_R1); - jit_finishi(c_function_wrapper); - -#if __WORDSIZE == 64 - jit_retval_i(LIGHTREC_REG_CYCLE); -#else - jit_retval(LIGHTREC_REG_CYCLE); -#endif + jit_pushargr(JIT_R2); + + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + + /* state->current_cycle = state->target_cycle - delta; */ + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); + + /* Call the wrapper function */ + jit_finishr(JIT_R1); + + /* delta = state->target_cycle - state->current_cycle */; + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); + + /* Restore temporaries from stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrapper_regs[i])); + } + } - jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_ret(); jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; + + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; - block->code_size = code_size; + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + state->wrappers_eps[i] = jit_address(addr[i]); if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); @@ -644,10 +965,10 @@ static struct block * generate_wrapper(struct lightrec_state *state) static u32 lightrec_memset(struct lightrec_state *state) { - u32 kunseg_pc = kunseg(state->native_reg_cache[4]); + u32 kunseg_pc = kunseg(state->regs.gpr[4]); void *host; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc); - u32 length = state->native_reg_cache[5] * 4; + u32 length = state->regs.gpr[5] * 4; if (!map) { pr_err("Unable to find memory map for memset target address " @@ -655,7 +976,7 @@ static u32 lightrec_memset(struct lightrec_state *state) return 0; } - pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n", + pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n", kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); @@ -670,10 +991,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3; unsigned int i; - u32 offset, ram_len; - jit_word_t code_size; + u32 offset; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -689,16 +1009,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); - jit_getarg(JIT_R0, jit_arg()); -#if __WORDSIZE == 64 + jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); -#else - jit_getarg(LIGHTREC_REG_CYCLE, jit_arg()); -#endif /* Force all callee-saved registers to be pushed on the stack */ for (i = 0; i < NUM_REGS; i++) - jit_movr(JIT_V(i), JIT_V(i)); + jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); /* Pass lightrec_state structure to blocks, using the last callee-saved * register that Lightning provides */ @@ -707,27 +1023,24 @@ static struct block * generate_dispatcher(struct lightrec_state *state) loop = jit_label(); /* Call the block's code */ - jit_jmpr(JIT_R0); + jit_jmpr(JIT_V1); if (OPT_REPLACE_MEMSET) { /* Blocks will jump here when they need to call * lightrec_memset() */ addr3 = jit_indirect(); + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_finishi(lightrec_memset); -#if __WORDSIZE == 64 jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, native_reg_cache[31])); -#else - jit_ldxi_i(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, native_reg_cache[31])); -#endif + offsetof(struct lightrec_state, regs.gpr[31])); - jit_retval(JIT_R0); - jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0); + jit_retval(LIGHTREC_REG_CYCLE); + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); } /* The block will jump here, with the number of cycles remaining in @@ -742,43 +1055,53 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); - to_c = jit_bgei(JIT_R0, ram_len); - - /* Fast path: code is running from RAM, use the code LUT */ -#if __WORDSIZE == 64 - jit_lshi(JIT_R0, JIT_R0, 1); -#endif - jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); - jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_V1, 28); + jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_addi(JIT_R2, JIT_R2, RAM_SIZE); + jit_movnr(JIT_V1, JIT_R2, JIT_R1); + + /* If possible, use the code LUT */ + if (!lut_is_32bit(state)) + jit_lshi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); + + offset = offsetof(struct lightrec_state, code_lut); + if (lut_is_32bit(state)) + jit_ldxi_ui(JIT_V1, JIT_V1, offset); + else + jit_ldxi(JIT_V1, JIT_V1, offset); /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); /* Slow path: call C function get_next_block_func() */ - jit_patch(to_c); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + LIGHTREC_REG_STATE, JIT_V1); } - /* The code LUT will be set to this address when the block at the target - * PC has been preprocessed but not yet compiled by the threaded - * recompiler */ - addr = jit_indirect(); - - /* Get the next block */ jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_V0); + + /* Save the cycles register if needed */ + if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES)) + jit_movr(JIT_V0, LIGHTREC_REG_CYCLE); + + /* Get the next block */ jit_finishi(&get_next_block_func); - jit_retval(JIT_R0); + jit_retval(JIT_V1); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and @@ -788,10 +1111,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + } else { + jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); /* When exiting, the recompiled code will jump to that address */ jit_note(__FILE__, __LINE__); @@ -801,15 +1126,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; state->eob_wrapper_func = jit_address(addr2); if (OPT_REPLACE_MEMSET) @@ -834,12 +1158,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) { - void *host; + void *host = NULL; lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *)host; - return (union code) *code; + return (union code) LE32TOH(*code); } unsigned int lightrec_cycles_of_opcode(union code code) @@ -847,11 +1171,13 @@ unsigned int lightrec_cycles_of_opcode(union code code) return 2; } -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block) +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) { + struct opcode_list *list = container_of(ops, struct opcode_list, ops); + lightrec_free(state, MEM_FOR_IR, - sizeof(*block->opcode_list) * block->nb_ops, - block->opcode_list); + sizeof(*list) + list->nb_ops * sizeof(struct opcode), + list); } static unsigned int lightrec_get_mips_block_len(const u32 *src) @@ -873,25 +1199,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src) static struct opcode * lightrec_disassemble(struct lightrec_state *state, const u32 *src, unsigned int *len) { - struct opcode *list; + struct opcode_list *list; unsigned int i, length; length = lightrec_get_mips_block_len(src); - list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length); + list = lightrec_malloc(state, MEM_FOR_IR, + sizeof(*list) + sizeof(struct opcode) * length); if (!list) { pr_err("Unable to allocate memory\n"); return NULL; } + list->nb_ops = (u16) length; + for (i = 0; i < length; i++) { - list[i].opcode = LE32TOH(src[i]); - list[i].flags = 0; + list->ops[i].opcode = LE32TOH(src[i]); + list->ops[i].flags = 0; } *len = length * sizeof(u32); - return list; + return list->ops; } static struct block * lightrec_precompile_block(struct lightrec_state *state, @@ -899,10 +1228,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, { struct opcode *list; struct block *block; - void *host; + void *host, *addr; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *) host; unsigned int length; + bool fully_tagged; + u8 block_flags = 0; if (!map) return NULL; @@ -927,9 +1258,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->next = NULL; block->flags = 0; block->code_size = 0; -#if ENABLE_THREADED_COMPILER - block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; -#endif + block->precompile_date = state->current_cycle; block->nb_ops = length / sizeof(u32); lightrec_optimize(state, block); @@ -939,7 +1268,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, lightrec_register(MEM_FOR_MIPS_CODE, length); if (ENABLE_DISASSEMBLER) { - pr_debug("Disassembled block at PC: 0x%x\n", block->pc); + pr_debug("Disassembled block at PC: 0x%08x\n", block->pc); lightrec_print_disassembly(block, code); } @@ -948,13 +1277,23 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, /* If the first opcode is an 'impossible' branch, never compile the * block */ if (should_emulate(block->opcode_list)) - block->flags |= BLOCK_NEVER_COMPILE; + block_flags |= BLOCK_NEVER_COMPILE; + + fully_tagged = lightrec_block_is_fully_tagged(block); + if (fully_tagged) + block_flags |= BLOCK_FULLY_TAGGED; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - state->code_lut[lut_offset(pc)] = state->memset_func; + if (block_flags) + block_set_flags(block, block_flags); block->hash = lightrec_calculate_block_hash(block); + if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET)) + addr = state->memset_func; + else + addr = state->get_next_block; + lut_write(state, lut_offset(pc), addr); + pr_debug("Recompile count: %u\n", state->nb_precompile++); return block; @@ -986,10 +1325,10 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: - if (!(op->flags & (LIGHTREC_DIRECT_IO | - LIGHTREC_HW_IO))) + if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; - default: /* fall-through */ + fallthrough; + default: continue; } } @@ -1002,6 +1341,7 @@ static void lightrec_reap_block(struct lightrec_state *state, void *data) struct block *block = data; pr_debug("Reap dead block at PC 0x%08x\n", block->pc); + lightrec_unregister_block(state->block_cache, block); lightrec_free_block(state, block); } @@ -1010,35 +1350,58 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data) _jit_destroy_state(data); } -int lightrec_compile_block(struct lightrec_state *state, struct block *block) +static void lightrec_free_function(struct lightrec_state *state, void *fn) +{ + if (ENABLE_CODE_BUFFER && state->tlsf) { + pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn); + lightrec_free_code(state, fn); + } +} + +static void lightrec_reap_function(struct lightrec_state *state, void *data) +{ + lightrec_free_function(state, data); +} + +static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) { + lightrec_free_opcode_list(state, data); +} + +int lightrec_compile_block(struct lightrec_cstate *cstate, + struct block *block) +{ + struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; - bool op_list_freed = false, fully_tagged = false; + bool fully_tagged = false; struct block *block2; struct opcode *elm; jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; - jit_word_t code_size; + void *old_fn, *new_fn; + size_t old_code_size; unsigned int i, j; + u8 old_flags; u32 offset; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_set_flags(block, BLOCK_FULLY_TAGGED); _jit = jit_new_state(); if (!_jit) return -ENOMEM; oldjit = block->_jit; + old_fn = block->function; + old_code_size = block->code_size; block->_jit = _jit; - lightrec_regcache_reset(state->reg_cache); - state->cycles = 0; - state->nb_branches = 0; - state->nb_local_branches = 0; - state->nb_targets = 0; + lightrec_regcache_reset(cstate->reg_cache); + cstate->cycles = 0; + cstate->nb_local_branches = 0; + cstate->nb_targets = 0; jit_prolog(); jit_tramp(256); @@ -1053,33 +1416,29 @@ int lightrec_compile_block(struct lightrec_state *state, struct block *block) continue; } - state->cycles += lightrec_cycles_of_opcode(elm->c); - if (should_emulate(elm)) { pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); - lightrec_emit_eob(state, block, i); - skip_next = !(elm->flags & LIGHTREC_NO_DS); + lightrec_emit_eob(cstate, block, i); + skip_next = !op_flag_no_ds(elm->flags); } else { - lightrec_rec_opcode(state, block, i); - skip_next = has_delay_slot(elm->c) && - !(elm->flags & LIGHTREC_NO_DS); + lightrec_rec_opcode(cstate, block, i); + skip_next = !op_flag_no_ds(elm->flags) && has_delay_slot(elm->c); #if _WIN32 /* FIXME: GNU Lightning on Windows seems to use our * mapped registers as temporaries. Until the actual bug * is found and fixed, unconditionally mark our * registers as live here. */ - lightrec_regcache_mark_live(state->reg_cache, _jit); + lightrec_regcache_mark_live(cstate->reg_cache, _jit); #endif } - } - for (i = 0; i < state->nb_branches; i++) - jit_patch(state->branches[i]); + cstate->cycles += lightrec_cycles_of_opcode(elm->c); + } - for (i = 0; i < state->nb_local_branches; i++) { - struct lightrec_branch *branch = &state->local_branches[i]; + for (i = 0; i < cstate->nb_local_branches; i++) { + struct lightrec_branch *branch = &cstate->local_branches[i]; pr_debug("Patch local branch to offset 0x%x\n", branch->target << 2); @@ -1089,105 +1448,140 @@ int lightrec_compile_block(struct lightrec_state *state, struct block *block) continue; } - for (j = 0; j < state->nb_targets; j++) { - if (state->targets[j].offset == branch->target) { + for (j = 0; j < cstate->nb_targets; j++) { + if (cstate->targets[j].offset == branch->target) { jit_patch_at(branch->branch, - state->targets[j].label); + cstate->targets[j].label); break; } } - if (j == state->nb_targets) + if (j == cstate->nb_targets) pr_err("Unable to find branch target\n"); } - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, eob_wrapper_func)); - - jit_jmpr(JIT_R0); - jit_ret(); jit_epilog(); - block->function = jit_emit(); - block->flags &= ~BLOCK_SHOULD_RECOMPILE; + new_fn = lightrec_emit_code(state, block, _jit, &block->code_size); + if (!new_fn) { + if (!ENABLE_THREADED_COMPILER) + pr_err("Unable to compile block!\n"); + block->_jit = oldjit; + jit_clear_state(); + _jit_destroy_state(_jit); + return -ENOMEM; + } - /* Add compiled function to the LUT */ - state->code_lut[lut_offset(block->pc)] = block->function; + /* Pause the reaper, because lightrec_reset_lut_offset() may try to set + * the old block->function pointer to the code LUT. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); - /* Fill code LUT with the block's entry points */ - for (i = 0; i < state->nb_targets; i++) { - target = &state->targets[i]; + block->function = new_fn; + block_clear_flags(block, BLOCK_SHOULD_RECOMPILE); - if (target->offset) { - offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); - } - } + /* Add compiled function to the LUT */ + lut_write(state, lut_offset(block->pc), block->function); + + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); /* Detect old blocks that have been covered by the new one */ - for (i = 0; i < state->nb_targets; i++) { - target = &state->targets[i]; + for (i = 0; i < cstate->nb_targets; i++) { + target = &cstate->targets[i]; if (!target->offset) continue; offset = block->pc + target->offset * sizeof(u32); + + /* Pause the reaper while we search for the block until we set + * the BLOCK_IS_DEAD flag, otherwise the block may be removed + * under our feet. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must * be, otherwise block wouldn't be compilable either */ - block2->flags |= BLOCK_IS_DEAD; + /* Set the "block dead" flag to prevent the dynarec from + * recompiling this block */ + old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + } + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_continue(state->reaper); + + /* If block2 was pending for compilation, cancel it. + * If it's being compiled right now, wait until it + * finishes. */ + if (block2) + lightrec_recompiler_remove(state->rec, block2); + } + + /* We know from now on that block2 (if present) isn't going to + * be compiled. We can override the LUT entry with our new + * block's entry point. */ + offset = lut_offset(block->pc) + target->offset; + lut_write(state, offset, jit_address(target->label)); + if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); - lightrec_unregister_block(state->block_cache, block2); - - if (ENABLE_THREADED_COMPILER) { - lightrec_recompiler_remove(state->rec, block2); + /* Finally, reap the block. */ + if (!ENABLE_THREADED_COMPILER) { + lightrec_unregister_block(state->block_cache, block2); + lightrec_free_block(state, block2); + } else if (!(old_flags & BLOCK_IS_DEAD)) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); - } else { - lightrec_free_block(state, block2); } } } - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { - pr_debug("Compiling block at PC: 0x%x\n", block->pc); + pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); } jit_clear_state(); -#if ENABLE_THREADED_COMPILER if (fully_tagged) - op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); -#endif - if (fully_tagged && !op_list_freed) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_opcode_list, + block->opcode_list); + } else { + lightrec_free_opcode_list(state, block->opcode_list); + } } if (oldjit) { pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", block->pc); - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_reaper_add(state->reaper, lightrec_reap_jit, oldjit); - else + lightrec_reaper_add(state->reaper, + lightrec_reap_function, old_fn); + } else { _jit_destroy_state(oldjit); + lightrec_free_function(state, old_fn); + } + + lightrec_unregister(MEM_FOR_CODE, old_code_size); } return 0; @@ -1240,20 +1634,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) return state->next_pc; } -u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, + u32 target_cycle) { - return lightrec_execute(state, pc, state->current_cycle); -} - -u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) -{ - struct block *block = lightrec_get_block(state, pc); - if (!block) - return 0; + struct block *block; state->exit_flags = LIGHTREC_EXIT_NORMAL; + state->target_cycle = target_cycle; + + do { + block = lightrec_get_block(state, pc); + if (!block) + break; + + pc = lightrec_emulate_block(state, block, pc); - pc = lightrec_emulate_block(state, block, pc); + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_reap(state->reaper); + } while (state->current_cycle < state->target_cycle); if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); @@ -1263,64 +1661,117 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) void lightrec_free_block(struct lightrec_state *state, struct block *block) { + u8 old_flags; + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); - if (block->opcode_list) - lightrec_free_opcode_list(state, block); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + lightrec_free_opcode_list(state, block->opcode_list); if (block->_jit) _jit_destroy_state(block->_jit); - lightrec_unregister(MEM_FOR_CODE, block->code_size); + if (block->function) { + lightrec_free_function(state, block->function); + lightrec_unregister(MEM_FOR_CODE, block->code_size); + } lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); } +struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state) +{ + struct lightrec_cstate *cstate; + + cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate)); + if (!cstate) + return NULL; + + cstate->reg_cache = lightrec_regcache_init(state); + if (!cstate->reg_cache) { + lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate); + return NULL; + } + + cstate->state = state; + + return cstate; +} + +void lightrec_free_cstate(struct lightrec_cstate *cstate) +{ + lightrec_free_regcache(cstate->reg_cache); + lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate); +} + struct lightrec_state * lightrec_init(char *argv0, const struct lightrec_mem_map *map, size_t nb, const struct lightrec_ops *ops) { + const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER]; struct lightrec_state *state; + uintptr_t addr; + void *tlsf = NULL; + bool with_32bit_lut = false; + size_t lut_size; /* Sanity-check ops */ - if (!ops || - !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc || - !ops->cop0_ops.ctc || !ops->cop0_ops.op || - !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc || - !ops->cop2_ops.ctc || !ops->cop2_ops.op) { + if (!ops || !ops->cop2_op || !ops->enable_ram) { pr_err("Missing callbacks in lightrec_ops structure\n"); return NULL; } + if (ops->cop2_notify) + pr_debug("Optional cop2_notify callback in lightrec_ops\n"); + else + pr_debug("No optional cop2_notify callback in lightrec_ops\n"); + + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER + && codebuf_map->address) { + tlsf = tlsf_create_with_pool(codebuf_map->address, + codebuf_map->length); + if (!tlsf) { + pr_err("Unable to initialize code buffer\n"); + return NULL; + } + + if (__WORDSIZE == 64) { + addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1; + with_32bit_lut = addr == (u32) addr; + } + } + + if (with_32bit_lut) + lut_size = CODE_LUT_SIZE * 4; + else + lut_size = CODE_LUT_SIZE * sizeof(void *); + init_jit(argv0); - state = calloc(1, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + state = calloc(1, sizeof(*state) + lut_size); if (!state) goto err_finish_jit; - lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size); -#if ENABLE_TINYMM - state->tinymm = tinymm_init(malloc, free, 4096); - if (!state->tinymm) - goto err_free_state; -#endif + state->tlsf = tlsf; + state->with_32bit_lut = with_32bit_lut; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) - goto err_free_tinymm; - - state->reg_cache = lightrec_regcache_init(state); - if (!state->reg_cache) - goto err_free_block_cache; + goto err_free_state; if (ENABLE_THREADED_COMPILER) { state->rec = lightrec_recompiler_init(state); if (!state->rec) - goto err_free_reg_cache; + goto err_free_block_cache; state->reaper = lightrec_reaper_init(state); if (!state->reaper) goto err_free_recompiler; + } else { + state->cstate = lightrec_create_cstate(state); + if (!state->cstate) + goto err_free_block_cache; } state->nb_maps = nb; @@ -1336,16 +1787,11 @@ struct lightrec_state * lightrec_init(char *argv0, if (!state->c_wrapper_block) goto err_free_dispatcher; - state->c_wrapper = state->c_wrapper_block->function; - state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; - state->c_wrappers[C_WRAPPER_RFE] = lightrec_rfe_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; - state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; - state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb; map = &state->maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; @@ -1353,6 +1799,9 @@ struct lightrec_state * lightrec_init(char *argv0, map = &state->maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_HW_REGISTERS]; + state->offset_io = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; @@ -1364,12 +1813,16 @@ struct lightrec_state * lightrec_init(char *argv0, if (state->offset_bios == 0 && state->offset_scratch == 0 && state->offset_ram == 0 && + state->offset_io == 0 && state->mirrors_mapped) { pr_info("Memory map is perfect. Emitted code will be best.\n"); } else { pr_info("Memory map is sub-par. Emitted code will be slow.\n"); } + if (state->with_32bit_lut) + pr_info("Using 32-bit LUT\n"); + return state; err_free_dispatcher: @@ -1380,20 +1833,18 @@ struct lightrec_state * lightrec_init(char *argv0, err_free_recompiler: if (ENABLE_THREADED_COMPILER) lightrec_free_recompiler(state->rec); -err_free_reg_cache: - lightrec_free_regcache(state->reg_cache); + else + lightrec_free_cstate(state->cstate); err_free_block_cache: lightrec_free_block_cache(state->block_cache); -err_free_tinymm: -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); err_free_state: -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); err_finish_jit: finish_jit(); + if (ENABLE_CODE_BUFFER && tlsf) + tlsf_destroy(tlsf); return NULL; } @@ -1403,44 +1854,51 @@ void lightrec_destroy(struct lightrec_state *state) state->current_cycle = ~state->current_cycle; lightrec_print_info(state); + lightrec_free_block_cache(state->block_cache); + lightrec_free_block(state, state->dispatcher); + lightrec_free_block(state, state->c_wrapper_block); + if (ENABLE_THREADED_COMPILER) { lightrec_free_recompiler(state->rec); lightrec_reaper_destroy(state->reaper); + } else { + lightrec_free_cstate(state->cstate); } - lightrec_free_regcache(state->reg_cache); - lightrec_free_block_cache(state->block_cache); - lightrec_free_block(state, state->dispatcher); - lightrec_free_block(state, state->c_wrapper_block); finish_jit(); + if (ENABLE_CODE_BUFFER && state->tlsf) + tlsf_destroy(state->tlsf); -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); } void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) { u32 kaddr = kunseg(addr & ~0x3); - const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr); - - if (map) { - if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM]) - return; + enum psx_map idx = lightrec_get_map_idx(state, kaddr); + switch (idx) { + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: /* Handle mirrors */ - kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1); - - lightrec_invalidate_map(state, map, kaddr, len); + kaddr &= RAM_SIZE - 1; + fallthrough; + case PSX_MAP_KERNEL_USER_RAM: + break; + default: + return; } + + memset(lut_address(state, lut_offset(kaddr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } void lightrec_invalidate_all(struct lightrec_state *state) { - memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); + memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) @@ -1464,16 +1922,6 @@ u32 lightrec_exit_flags(struct lightrec_state *state) return state->exit_flags; } -void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]) -{ - memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache)); -} - -void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34]) -{ - memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache)); -} - u32 lightrec_current_cycle_count(const struct lightrec_state *state) { return state->current_cycle; @@ -1496,3 +1944,8 @@ void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles) state->target_cycle = cycles; } } + +struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state) +{ + return &state->regs; +} diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 1a2b5426a..310036ced 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -43,10 +43,11 @@ struct lightrec_mem_map; /* Exit flags */ #define LIGHTREC_EXIT_NORMAL (0) -#define LIGHTREC_EXIT_SYSCALL (1 << 0) +#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 0) #define LIGHTREC_EXIT_BREAK (1 << 1) -#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 2) +#define LIGHTREC_EXIT_SYSCALL (1 << 2) #define LIGHTREC_EXIT_SEGFAULT (1 << 3) +#define LIGHTREC_EXIT_NOMEM (1 << 4) enum psx_map { PSX_MAP_KERNEL_USER_RAM, @@ -58,6 +59,9 @@ enum psx_map { PSX_MAP_MIRROR1, PSX_MAP_MIRROR2, PSX_MAP_MIRROR3, + PSX_MAP_CODE_BUFFER, + + PSX_MAP_UNKNOWN, }; struct lightrec_mem_map_ops { @@ -80,17 +84,18 @@ struct lightrec_mem_map { const struct lightrec_mem_map *mirror_of; }; -struct lightrec_cop_ops { - u32 (*mfc)(struct lightrec_state *state, u32 op, u8 reg); - u32 (*cfc)(struct lightrec_state *state, u32 op, u8 reg); - void (*mtc)(struct lightrec_state *state, u32 op, u8 reg, u32 value); - void (*ctc)(struct lightrec_state *state, u32 op, u8 reg, u32 value); - void (*op)(struct lightrec_state *state, u32 op); +struct lightrec_ops { + void (*cop2_notify)(struct lightrec_state *state, u32 op, u32 data); + void (*cop2_op)(struct lightrec_state *state, u32 op); + void (*enable_ram)(struct lightrec_state *state, _Bool enable); + _Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size); }; -struct lightrec_ops { - struct lightrec_cop_ops cop0_ops; - struct lightrec_cop_ops cop2_ops; +struct lightrec_registers { + u32 gpr[34]; + u32 cp0[32]; + u32 cp2d[32]; + u32 cp2c[32]; }; __api struct lightrec_state *lightrec_init(char *argv0, @@ -102,8 +107,8 @@ __api void lightrec_destroy(struct lightrec_state *state); __api u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle); -__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc); -__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc); +__api u32 lightrec_run_interpreter(struct lightrec_state *state, + u32 pc, u32 target_cycle); __api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len); __api void lightrec_invalidate_all(struct lightrec_state *state); @@ -113,9 +118,7 @@ __api void lightrec_set_invalidate_mode(struct lightrec_state *state, __api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags); __api u32 lightrec_exit_flags(struct lightrec_state *state); -__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]); -__api void lightrec_restore_registers(struct lightrec_state *state, - u32 regs[34]); +__api struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state); __api u32 lightrec_current_cycle_count(const struct lightrec_state *state); __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles); diff --git a/deps/lightrec/memmanager.c b/deps/lightrec/memmanager.c index fb626ded4..c7502cdba 100644 --- a/deps/lightrec/memmanager.c +++ b/deps/lightrec/memmanager.c @@ -3,14 +3,11 @@ * Copyright (C) 2019-2021 Paul Cercueil */ -#include "config.h" +#include "lightrec-config.h" #include "lightrec-private.h" #include "memmanager.h" #include -#if ENABLE_TINYMM -#include -#endif #ifdef ENABLE_THREADED_COMPILER #include @@ -67,12 +64,7 @@ void * lightrec_malloc(struct lightrec_state *state, { void *ptr; -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - ptr = tinymm_malloc(state->tinymm, len); - else -#endif - ptr = malloc(len); + ptr = malloc(len); if (!ptr) return NULL; @@ -86,12 +78,7 @@ void * lightrec_calloc(struct lightrec_state *state, { void *ptr; -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - ptr = tinymm_zalloc(state->tinymm, len); - else -#endif - ptr = calloc(1, len); + ptr = calloc(1, len); if (!ptr) return NULL; @@ -104,12 +91,7 @@ void lightrec_free(struct lightrec_state *state, enum mem_type type, unsigned int len, void *ptr) { lightrec_unregister(type, len); -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - tinymm_free(state->tinymm, ptr); - else -#endif - free(ptr); + free(ptr); } float lightrec_get_average_ipi(void) diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index b7d538be1..10067a7d0 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -3,7 +3,7 @@ * Copyright (C) 2014-2021 Paul Cercueil */ -#include "config.h" +#include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" #include "memmanager.h" @@ -22,6 +22,8 @@ struct optimizer_list { unsigned int nb_optimizers; }; +static bool is_nop(union code op); + bool is_unconditional_jump(union code c) { switch (c.i.op) { @@ -67,6 +69,9 @@ static u64 opcode_read_mask(union code op) case OP_SPECIAL_MFLO: return BIT(REG_LO); case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; case OP_SPECIAL_SRL: case OP_SPECIAL_SRA: return BIT(op.r.rt); @@ -97,6 +102,9 @@ static u64 opcode_read_mask(union code op) case OP_LUI: return 0; case OP_BEQ: + if (op.i.rs == op.i.rt) + return 0; + fallthrough; case OP_BNE: case OP_LWL: case OP_LWR: @@ -111,11 +119,31 @@ static u64 opcode_read_mask(union code op) } } -static u64 opcode_write_mask(union code op) +static u64 mult_div_write_mask(union code op) { u64 flags; + if (!OPT_FLAG_MULT_DIV) + return BIT(REG_LO) | BIT(REG_HI); + + if (op.r.rd) + flags = BIT(op.r.rd); + else + flags = BIT(REG_LO); + if (op.r.imm) + flags |= BIT(op.r.imm); + else + flags |= BIT(REG_HI); + + return flags; +} + +static u64 opcode_write_mask(union code op) +{ switch (op.i.op) { + case OP_META_MULT2: + case OP_META_MULTU2: + return mult_div_write_mask(op); case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: @@ -126,22 +154,15 @@ static u64 opcode_write_mask(union code op) case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: - if (!OPT_FLAG_MULT_DIV) - return BIT(REG_LO) | BIT(REG_HI); - - if (op.r.rd) - flags = BIT(op.r.rd); - else - flags = BIT(REG_LO); - if (op.r.imm) - flags |= BIT(op.r.imm); - else - flags |= BIT(REG_HI); - return flags; + return mult_div_write_mask(op); case OP_SPECIAL_MTHI: return BIT(REG_HI); case OP_SPECIAL_MTLO: return BIT(REG_LO); + case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; default: return BIT(op.r.rd); } @@ -160,6 +181,8 @@ static u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_EXTC: + case OP_META_EXTS: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -207,6 +230,116 @@ bool opcode_writes_register(union code op, u8 reg) return opcode_write_mask(op) & BIT(reg); } +static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg) +{ + union code c; + unsigned int i; + + if (op_flag_sync(list[offset].flags)) + return -1; + + for (i = offset; i > 0; i--) { + c = list[i - 1].c; + + if (opcode_writes_register(c, reg)) { + if (i > 1 && has_delay_slot(list[i - 2].c)) + break; + + return i - 1; + } + + if (op_flag_sync(list[i - 1].flags) || + has_delay_slot(c) || + opcode_reads_register(c, reg)) + break; + } + + return -1; +} + +static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg) +{ + unsigned int i; + union code c; + + if (op_flag_sync(list[offset].flags)) + return -1; + + for (i = offset; ; i++) { + c = list[i].c; + + if (opcode_reads_register(c, reg)) { + if (i > 0 && has_delay_slot(list[i - 1].c)) + break; + + return i; + } + + if (op_flag_sync(list[i].flags) || + has_delay_slot(c) || opcode_writes_register(c, reg)) + break; + } + + return -1; +} + +static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg) +{ + unsigned int i; + + if (op_flag_sync(list[offset].flags)) + return false; + + for (i = offset + 1; ; i++) { + if (opcode_reads_register(list[i].c, reg)) + return false; + + if (opcode_writes_register(list[i].c, reg)) + return true; + + if (has_delay_slot(list[i].c)) { + if (op_flag_no_ds(list[i].flags) || + opcode_reads_register(list[i + 1].c, reg)) + return false; + + return opcode_writes_register(list[i + 1].c, reg); + } + } +} + +static bool reg_is_read(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + /* Return true if reg is read in one of the opcodes of the interval + * [a, b[ */ + for (; a < b; a++) { + if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg)) + return true; + } + + return false; +} + +static bool reg_is_written(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + /* Return true if reg is written in one of the opcodes of the interval + * [a, b[ */ + + for (; a < b; a++) { + if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg)) + return true; + } + + return false; +} + +static bool reg_is_read_or_written(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg); +} + static bool opcode_is_load(union code op) { switch (op.i.op) { @@ -239,6 +372,22 @@ static bool opcode_is_store(union code op) } } +static u8 opcode_get_io_size(union code op) +{ + switch (op.i.op) { + case OP_LB: + case OP_LBU: + case OP_SB: + return 8; + case OP_LH: + case OP_LHU: + case OP_SH: + return 16; + default: + return 32; + } +} + bool opcode_is_io(union code op) { return opcode_is_load(op) || opcode_is_store(op); @@ -348,8 +497,19 @@ bool load_in_delay_slot(union code op) return false; } -static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) +static u32 lightrec_propagate_consts(const struct opcode *op, + const struct opcode *prev, + u32 known, u32 *v) { + union code c = prev->c; + + /* Register $zero is always, well, zero */ + known |= BIT(0); + v[0] = 0; + + if (op_flag_sync(op->flags)) + return BIT(0); + switch (c.i.op) { case OP_SPECIAL: switch (c.r.op) { @@ -468,10 +628,52 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) known &= ~BIT(c.r.rd); } break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + break; + case OP_SPECIAL_MFLO: + case OP_SPECIAL_MFHI: + known &= ~BIT(c.r.rd); + break; default: break; } break; + case OP_META_MULT2: + case OP_META_MULTU2: + if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) { + if (c.r.rd) { + known |= BIT(c.r.rd); + + if (c.r.op < 32) + v[c.r.rd] = v[c.r.rs] << c.r.op; + else + v[c.r.rd] = 0; + } + + if (c.r.imm) { + known |= BIT(c.r.imm); + + if (c.r.op >= 32) + v[c.r.imm] = v[c.r.rs] << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op); + else + v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op); + } + } else { + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + } + break; case OP_REGIMM: break; case OP_ADDI: @@ -563,6 +765,22 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) known &= ~BIT(c.r.rd); } break; + case OP_META_EXTC: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = (s32)(s8)v[c.i.rs]; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_META_EXTS: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = (s32)(s16)v[c.i.rs]; + } else { + known &= ~BIT(c.i.rt); + } + break; default: break; } @@ -570,90 +788,353 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) return known; } +static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset) +{ + struct opcode *prev, *prev2 = NULL, *curr = &list[offset]; + struct opcode *to_change, *to_nop; + int idx, idx2; + + if (curr->r.imm != 24 && curr->r.imm != 16) + return; + + idx = find_prev_writer(list, offset, curr->r.rt); + if (idx < 0) + return; + + prev = &list[idx]; + + if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL || + prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt) + return; + + if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) { + /* sll rY, rX, 16 + * ... + * srl rZ, rY, 16 */ + + if (!reg_is_dead(list, offset, curr->r.rt) || + reg_is_read_or_written(list, idx, offset, curr->r.rd)) + return; + + /* If rY is dead after the SRL, and rZ is not used after the SLL, + * we can change rY to rZ */ + + pr_debug("Detected SLL/SRA with middle temp register\n"); + prev->r.rd = curr->r.rd; + curr->r.rt = prev->r.rd; + } + + /* We got a SLL/SRA combo. If imm #16, that's a cast to u16. + * If imm #24 that's a cast to u8. + * + * First of all, make sure that the target register of the SLL is not + * read before the SRA. */ + + if (prev->r.rd == prev->r.rt) { + /* sll rX, rX, 16 + * ... + * srl rY, rX, 16 */ + to_change = curr; + to_nop = prev; + + /* rX is used after the SRA - we cannot convert it. */ + if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd)) + return; + } else { + /* sll rY, rX, 16 + * ... + * srl rY, rY, 16 */ + to_change = prev; + to_nop = curr; + } + + idx2 = find_prev_writer(list, idx, prev->r.rt); + if (idx2 >= 0) { + /* Note that PSX games sometimes do casts after + * a LHU or LBU; in this case we can change the + * load opcode to a LH or LB, and the cast can + * be changed to a MOV or a simple NOP. */ + + prev2 = &list[idx2]; + + if (curr->r.rd != prev2->i.rt && + !reg_is_dead(list, offset, prev2->i.rt)) + prev2 = NULL; + else if (curr->r.imm == 16 && prev2->i.op == OP_LHU) + prev2->i.op = OP_LH; + else if (curr->r.imm == 24 && prev2->i.op == OP_LBU) + prev2->i.op = OP_LB; + else + prev2 = NULL; + + if (prev2) { + if (curr->r.rd == prev2->i.rt) { + to_change->opcode = 0; + } else if (reg_is_dead(list, offset, prev2->i.rt) && + !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) { + /* The target register of the SRA is dead after the + * LBU/LHU; we can change the target register of the + * LBU/LHU to the one of the SRA. */ + prev2->i.rt = curr->r.rd; + to_change->opcode = 0; + } else { + to_change->i.op = OP_META_MOV; + to_change->r.rd = curr->r.rd; + to_change->r.rs = prev2->i.rt; + } + + if (to_nop->r.imm == 24) + pr_debug("Convert LBU+SLL+SRA to LB\n"); + else + pr_debug("Convert LHU+SLL+SRA to LH\n"); + } + } + + if (!prev2) { + pr_debug("Convert SLL/SRA #%u to EXT%c\n", + prev->r.imm, + prev->r.imm == 24 ? 'C' : 'S'); + + if (to_change == prev) { + to_change->i.rs = prev->r.rt; + to_change->i.rt = curr->r.rd; + } else { + to_change->i.rt = curr->r.rd; + to_change->i.rs = prev->r.rt; + } + + if (to_nop->r.imm == 24) + to_change->i.op = OP_META_EXTC; + else + to_change->i.op = OP_META_EXTS; + } + + to_nop->opcode = 0; +} + +static void lightrec_remove_useless_lui(struct block *block, unsigned int offset, + u32 known, u32 *values) +{ + struct opcode *list = block->opcode_list, + *op = &block->opcode_list[offset]; + int reader; + + if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) && + values[op->i.rt] == op->i.imm << 16) { + pr_debug("Converting duplicated LUI to NOP\n"); + op->opcode = 0x0; + return; + } + + if (op->i.imm != 0 || op->i.rt == 0) + return; + + reader = find_next_reader(list, offset + 1, op->i.rt); + if (reader <= 0) + return; + + if (opcode_writes_register(list[reader].c, op->i.rt) || + reg_is_dead(list, reader, op->i.rt)) { + pr_debug("Removing useless LUI 0x0\n"); + + if (list[reader].i.rs == op->i.rt) + list[reader].i.rs = 0; + if (list[reader].i.op == OP_SPECIAL && + list[reader].i.rt == op->i.rt) + list[reader].i.rt = 0; + op->opcode = 0x0; + } +} + +static void lightrec_modify_lui(struct block *block, unsigned int offset) +{ + union code c, *lui = &block->opcode_list[offset].c; + bool stop = false, stop_next = false; + unsigned int i; + + for (i = offset + 1; !stop && i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + stop = stop_next; + + if ((opcode_is_store(c) && c.i.rt == lui->i.rt) + || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt))) + break; + + if (opcode_writes_register(c, lui->i.rt)) { + pr_debug("Convert LUI at offset 0x%x to kuseg\n", + i - 1 << 2); + lui->i.imm = kunseg(lui->i.imm << 16) >> 16; + break; + } + + if (has_delay_slot(c)) + stop_next = true; + } +} + +static int lightrec_transform_branches(struct lightrec_state *state, + struct block *block) +{ + struct opcode *op; + unsigned int i; + s32 offset; + + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; + + switch (op->i.op) { + case OP_J: + /* Transform J opcode into BEQ $zero, $zero if possible. */ + offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm) + - (s32)(block->pc >> 2) - (s32)i - 1; + + if (offset == (s16)offset) { + pr_debug("Transform J into BEQ $zero, $zero\n"); + op->i.op = OP_BEQ; + op->i.rs = 0; + op->i.rt = 0; + op->i.imm = offset; + + } + fallthrough; + default: + break; + } + } + + return 0; +} + +static inline bool is_power_of_two(u32 value) +{ + return popcount32(value) == 1; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *list = block->opcode_list; + struct opcode *prev, *op = NULL; + u32 known = BIT(0); + u32 values[32] = { 0 }; unsigned int i; + u8 tmp; for (i = 0; i < block->nb_ops; i++) { - list = &block->opcode_list[i]; + prev = op; + op = &list[i]; + + if (prev) + known = lightrec_propagate_consts(op, prev, known, values); /* Transform all opcodes detected as useless to real NOPs * (0x0: SLL r0, r0, #0) */ - if (list->opcode != 0 && is_nop(list->c)) { + if (op->opcode != 0 && is_nop(op->c)) { pr_debug("Converting useless opcode 0x%08x to NOP\n", - list->opcode); - list->opcode = 0x0; + op->opcode); + op->opcode = 0x0; } - if (!list->opcode) + if (!op->opcode) continue; - switch (list->i.op) { - /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the - * two registers is zero. */ + switch (op->i.op) { case OP_BEQ: - if ((list->i.rs == 0) ^ (list->i.rt == 0)) { - list->i.op = OP_META_BEQZ; - if (list->i.rs == 0) { - list->i.rs = list->i.rt; - list->i.rt = 0; - } - } else if (list->i.rs == list->i.rt) { - list->i.rs = 0; - list->i.rt = 0; + if (op->i.rs == op->i.rt) { + op->i.rs = 0; + op->i.rt = 0; + } else if (op->i.rs == 0) { + op->i.rs = op->i.rt; + op->i.rt = 0; } break; + case OP_BNE: - if (list->i.rs == 0) { - list->i.op = OP_META_BNEZ; - list->i.rs = list->i.rt; - list->i.rt = 0; - } else if (list->i.rt == 0) { - list->i.op = OP_META_BNEZ; + if (op->i.rs == 0) { + op->i.rs = op->i.rt; + op->i.rt = 0; } break; + case OP_LUI: + if (!prev || !has_delay_slot(prev->c)) + lightrec_modify_lui(block, i); + lightrec_remove_useless_lui(block, i, known, values); + break; + /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU * with register $zero to the MOV meta-opcode */ case OP_ORI: case OP_ADDI: case OP_ADDIU: - if (list->i.imm == 0) { + if (op->i.imm == 0) { pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rd = list->i.rt; + op->i.op = OP_META_MOV; + op->r.rd = op->i.rt; } break; case OP_SPECIAL: - switch (list->r.op) { - case OP_SPECIAL_SLL: + switch (op->r.op) { case OP_SPECIAL_SRA: + if (op->r.imm == 0) { + pr_debug("Convert SRA #0 to MOV\n"); + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; + break; + } + + lightrec_optimize_sll_sra(block->opcode_list, i); + break; + case OP_SPECIAL_SLL: case OP_SPECIAL_SRL: - if (list->r.imm == 0) { - pr_debug("Convert SLL/SRL/SRA #0 to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rs = list->r.rt; + if (op->r.imm == 0) { + pr_debug("Convert SLL/SRL #0 to MOV\n"); + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; + } + break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + if ((known & BIT(op->r.rs)) && + is_power_of_two(values[op->r.rs])) { + tmp = op->c.i.rs; + op->c.i.rs = op->c.i.rt; + op->c.i.rt = tmp; + } else if (!(known & BIT(op->r.rt)) || + !is_power_of_two(values[op->r.rt])) { + break; } + + pr_debug("Multiply by power-of-two: %u\n", + values[op->r.rt]); + + if (op->r.op == OP_SPECIAL_MULT) + op->i.op = OP_META_MULT2; + else + op->i.op = OP_META_MULTU2; + + op->r.op = ctz32(values[op->r.rt]); break; case OP_SPECIAL_OR: case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: - if (list->r.rs == 0) { + if (op->r.rs == 0) { pr_debug("Convert OR/ADD $zero to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rs = list->r.rt; + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; } - case OP_SPECIAL_SUB: /* fall-through */ + fallthrough; + case OP_SPECIAL_SUB: case OP_SPECIAL_SUBU: - if (list->r.rt == 0) { + if (op->r.rt == 0) { pr_debug("Convert OR/ADD/SUB $zero to MOV\n"); - list->i.op = OP_META_MOV; + op->i.op = OP_META_MOV; } - default: /* fall-through */ + fallthrough; + default: break; } - default: /* fall-through */ + fallthrough; + default: break; } } @@ -661,12 +1142,70 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl return 0; } +static bool lightrec_can_switch_delay_slot(union code op, union code next_op) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JALR: + if (opcode_reads_register(next_op, op.r.rd) || + opcode_writes_register(next_op, op.r.rd)) + return false; + fallthrough; + case OP_SPECIAL_JR: + if (opcode_writes_register(next_op, op.r.rs)) + return false; + fallthrough; + default: + break; + } + fallthrough; + case OP_J: + break; + case OP_JAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false;; + + break; + case OP_BEQ: + case OP_BNE: + if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) + return false; + fallthrough; + case OP_BLEZ: + case OP_BGTZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + case OP_REGIMM: + switch (op.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false; + fallthrough; + case OP_REGIMM_BLTZ: + case OP_REGIMM_BGEZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + } + fallthrough; + default: + break; + } + + return true; +} + static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block) { struct opcode *list, *next = &block->opcode_list[0]; unsigned int i; union code op, next_op; - u8 flags; + u32 flags; for (i = 0; i < block->nb_ops - 1; i++) { list = next; @@ -674,77 +1213,29 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc next_op = next->c; op = list->c; - if (!has_delay_slot(op) || - list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) || + if (!has_delay_slot(op) || op_flag_no_ds(list->flags) || + op_flag_emulate_branch(list->flags) || op.opcode == 0 || next_op.opcode == 0) continue; if (i && has_delay_slot(block->opcode_list[i - 1].c) && - !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS)) + !op_flag_no_ds(block->opcode_list[i - 1].flags)) continue; - if ((list->flags & LIGHTREC_SYNC) || - (next->flags & LIGHTREC_SYNC)) + if (op_flag_sync(next->flags)) continue; - switch (list->i.op) { - case OP_SPECIAL: - switch (op.r.op) { - case OP_SPECIAL_JALR: - if (opcode_reads_register(next_op, op.r.rd) || - opcode_writes_register(next_op, op.r.rd)) - continue; - case OP_SPECIAL_JR: /* fall-through */ - if (opcode_writes_register(next_op, op.r.rs)) - continue; - default: /* fall-through */ - break; - } - case OP_J: /* fall-through */ - break; - case OP_JAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - else - break; - case OP_BEQ: - case OP_BNE: - if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) - continue; - case OP_BLEZ: /* fall-through */ - case OP_BGTZ: - case OP_META_BEQZ: - case OP_META_BNEZ: - if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) - continue; - break; - case OP_REGIMM: - switch (op.r.rt) { - case OP_REGIMM_BLTZAL: - case OP_REGIMM_BGEZAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - case OP_REGIMM_BLTZ: /* fall-through */ - case OP_REGIMM_BGEZ: - if (op.i.rs && - opcode_writes_register(next_op, op.i.rs)) - continue; - break; - } - default: /* fall-through */ - break; - } + if (!lightrec_can_switch_delay_slot(list->c, next_op)) + continue; pr_debug("Swap branch and delay slot opcodes " "at offsets 0x%x / 0x%x\n", i << 2, (i + 1) << 2); - flags = next->flags; + flags = next->flags | (list->flags & LIGHTREC_SYNC); list->c = next_op; next->c = op; - next->flags = list->flags | LIGHTREC_NO_DS; + next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC; list->flags = flags | LIGHTREC_NO_DS; } @@ -753,7 +1244,7 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size) { - struct opcode *list; + struct opcode_list *list, *old_list; if (new_size >= block->nb_ops) { pr_err("Invalid shrink size (%u vs %u)\n", @@ -761,19 +1252,20 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, return -EINVAL; } - list = lightrec_malloc(state, MEM_FOR_IR, - sizeof(*list) * new_size); + sizeof(*list) + sizeof(struct opcode) * new_size); if (!list) { pr_err("Unable to allocate memory\n"); return -ENOMEM; } - memcpy(list, block->opcode_list, sizeof(*list) * new_size); + old_list = container_of(block->opcode_list, struct opcode_list, ops); + memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size); - lightrec_free_opcode_list(state, block); - block->opcode_list = list; + lightrec_free_opcode_list(state, block->opcode_list); + list->nb_ops = new_size; block->nb_ops = new_size; + block->opcode_list = list->ops; pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n", block->pc, new_size); @@ -784,13 +1276,14 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, static int lightrec_detect_impossible_branches(struct lightrec_state *state, struct block *block) { - struct opcode *op, *next = &block->opcode_list[0]; + struct opcode *op, *list = block->opcode_list, *next = &list[0]; unsigned int i; int ret = 0; + s16 offset; for (i = 0; i < block->nb_ops - 1; i++) { op = next; - next = &block->opcode_list[i + 1]; + next = &list[i + 1]; if (!has_delay_slot(op->c) || (!load_in_delay_slot(next->c) && @@ -805,9 +1298,23 @@ static int lightrec_detect_impossible_branches(struct lightrec_state *state, continue; } + offset = i + 1 + (s16)op->i.imm; + if (load_in_delay_slot(next->c) && + (offset >= 0 && offset < block->nb_ops) && + !opcode_reads_register(list[offset].c, next->c.i.rt)) { + /* The 'impossible' branch is a local branch - we can + * verify here that the first opcode of the target does + * not use the target register of the delay slot */ + + pr_debug("Branch at offset 0x%x has load delay slot, " + "but is local and dest opcode does not read " + "dest register\n", i << 2); + continue; + } + op->flags |= LIGHTREC_EMULATE_BRANCH; - if (op == block->opcode_list) { + if (op == list) { pr_debug("First opcode of block PC 0x%08x is an impossible branch\n", block->pc); @@ -841,12 +1348,11 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: offset = i + 1 + (s16)list->i.imm; if (offset >= 0 && offset < block->nb_ops) break; - default: /* fall-through */ + fallthrough; + default: continue; } @@ -889,8 +1395,6 @@ bool has_delay_slot(union code op) case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: return true; default: return false; @@ -899,105 +1403,287 @@ bool has_delay_slot(union code op) bool should_emulate(const struct opcode *list) { - return has_delay_slot(list->c) && - (list->flags & LIGHTREC_EMULATE_BRANCH); + return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c); +} + +static bool op_writes_rd(union code c) +{ + switch (c.i.op) { + case OP_SPECIAL: + case OP_META_MOV: + return true; + default: + return false; + } +} + +static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op) +{ + if (op_writes_rd(op->c) && reg == op->r.rd) + op->flags |= LIGHTREC_REG_RD(reg_op); + else if (op->i.rs == reg) + op->flags |= LIGHTREC_REG_RS(reg_op); + else if (op->i.rt == reg) + op->flags |= LIGHTREC_REG_RT(reg_op); + else + pr_debug("Cannot add unload/clean/discard flag: " + "opcode does not touch register %s!\n", + lightrec_reg_name(reg)); } static void lightrec_add_unload(struct opcode *op, u8 reg) { - if (op->i.op == OP_SPECIAL && reg == op->r.rd) - op->flags |= LIGHTREC_UNLOAD_RD; + lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD); +} + +static void lightrec_add_discard(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD); +} + +static void lightrec_add_clean(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN); +} + +static void +lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w) +{ + unsigned int reg; + s16 offset; + + for (reg = 0; reg < 34; reg++) { + offset = s16_max(last_w[reg], last_r[reg]); + + if (offset >= 0) + lightrec_add_unload(&list[offset], reg); + } - if (op->i.rs == reg) - op->flags |= LIGHTREC_UNLOAD_RS; - if (op->i.rt == reg) - op->flags |= LIGHTREC_UNLOAD_RT; + memset(last_r, 0xff, sizeof(*last_r) * 34); + memset(last_w, 0xff, sizeof(*last_w) * 34); } static int lightrec_early_unload(struct lightrec_state *state, struct block *block) { - unsigned int i, offset; + u16 i, offset; struct opcode *op; + s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0; + u64 mask_r, mask_w, dirty = 0, loaded = 0; u8 reg; - for (reg = 1; reg < 34; reg++) { - int last_r_id = -1, last_w_id = -1; + memset(last_r, 0xff, sizeof(last_r)); + memset(last_w, 0xff, sizeof(last_w)); + + /* + * Clean if: + * - the register is dirty, and is read again after a branch opcode + * + * Unload if: + * - the register is dirty or loaded, and is not read again + * - the register is dirty or loaded, and is written again after a branch opcode + * - the next opcode has the SYNC flag set + * + * Discard if: + * - the register is dirty or loaded, and is written again + */ + + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; - for (i = 0; i < block->nb_ops; i++) { - union code c = block->opcode_list[i].c; + if (op_flag_sync(op->flags) || should_emulate(op)) { + /* The next opcode has the SYNC flag set, or is a branch + * that should be emulated: unload all registers. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + dirty = 0; + loaded = 0; + } - if (opcode_reads_register(c, reg)) - last_r_id = i; - if (opcode_writes_register(c, reg)) - last_w_id = i; + if (next_sync == i) { + last_sync = i; + pr_debug("Last sync: 0x%x\n", last_sync << 2); } - if (last_w_id > last_r_id) - offset = (unsigned int)last_w_id; - else if (last_r_id >= 0) - offset = (unsigned int)last_r_id; - else - continue; + if (has_delay_slot(op->c)) { + next_sync = i + 1 + !op_flag_no_ds(op->flags); + pr_debug("Next sync: 0x%x\n", next_sync << 2); + } - op = &block->opcode_list[offset]; + mask_r = opcode_read_mask(op->c); + mask_w = opcode_write_mask(op->c); - if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS)) - offset++; + for (reg = 0; reg < 34; reg++) { + if (mask_r & BIT(reg)) { + if (dirty & BIT(reg) && last_w[reg] < last_sync) { + /* The register is dirty, and is read + * again after a branch: clean it */ - if (offset == block->nb_ops) - continue; + lightrec_add_clean(&block->opcode_list[last_w[reg]], reg); + dirty &= ~BIT(reg); + loaded |= BIT(reg); + } + + last_r[reg] = i; + } + + if (mask_w & BIT(reg)) { + if ((dirty & BIT(reg) && last_w[reg] < last_sync) || + (loaded & BIT(reg) && last_r[reg] < last_sync)) { + /* The register is dirty or loaded, and + * is written again after a branch: + * unload it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_unload(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } else if (!(mask_r & BIT(reg)) && + ((dirty & BIT(reg) && last_w[reg] > last_sync) || + (loaded & BIT(reg) && last_r[reg] > last_sync))) { + /* The register is dirty or loaded, and + * is written again: discard it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_discard(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } - lightrec_add_unload(&block->opcode_list[offset], reg); + last_w[reg] = i; + } + + } + + dirty |= mask_w; + loaded |= mask_r; } + /* Unload all registers that are dirty or loaded at the end of block. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + return 0; } -static int lightrec_flag_stores(struct lightrec_state *state, struct block *block) +static int lightrec_flag_io(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *prev = NULL, *list = NULL; + enum psx_map psx_map; u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; + u32 val, kunseg_val; + bool no_mask; for (i = 0; i < block->nb_ops; i++) { + prev = list; list = &block->opcode_list[i]; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; + if (prev) + known = lightrec_propagate_consts(list, prev, known, values); switch (list->i.op) { case OP_SB: case OP_SH: case OP_SW: - /* Mark all store operations that target $sp or $gp - * as not requiring code invalidation. This is based - * on the heuristic that stores using one of these - * registers as address will never hit a code page. */ - if (list->i.rs >= 28 && list->i.rs <= 29 && - !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { - pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", - list->opcode); - list->flags |= LIGHTREC_NO_INVALIDATE; - } + if (OPT_FLAG_STORES) { + /* Mark all store operations that target $sp or $gp + * as not requiring code invalidation. This is based + * on the heuristic that stores using one of these + * registers as address will never hit a code page. */ + if (list->i.rs >= 28 && list->i.rs <= 29 && + !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { + pr_debug("Flaging opcode 0x%08x as not " + "requiring invalidation\n", + list->opcode); + list->flags |= LIGHTREC_NO_INVALIDATE; + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } - /* Detect writes whose destination address is inside the - * current block, using constant propagation. When these - * occur, we mark the blocks as not compilable. */ - if ((known & BIT(list->i.rs)) && - kunseg(values[list->i.rs]) >= kunseg(block->pc) && - kunseg(values[list->i.rs]) < (kunseg(block->pc) + - block->nb_ops * 4)) { - pr_debug("Self-modifying block detected\n"); - block->flags |= BLOCK_NEVER_COMPILE; - list->flags |= LIGHTREC_SMC; + /* Detect writes whose destination address is inside the + * current block, using constant propagation. When these + * occur, we mark the blocks as not compilable. */ + if ((known & BIT(list->i.rs)) && + kunseg(values[list->i.rs]) >= kunseg(block->pc) && + kunseg(values[list->i.rs]) < (kunseg(block->pc) + + block->nb_ops * 4)) { + pr_debug("Self-modifying block detected\n"); + block_set_flags(block, BLOCK_NEVER_COMPILE); + list->flags |= LIGHTREC_SMC; + } + } + fallthrough; + case OP_SWL: + case OP_SWR: + case OP_SWC2: + case OP_LB: + case OP_LBU: + case OP_LH: + case OP_LHU: + case OP_LW: + case OP_LWL: + case OP_LWR: + case OP_LWC2: + if (OPT_FLAG_IO && (known & BIT(list->i.rs))) { + val = values[list->i.rs] + (s16) list->i.imm; + kunseg_val = kunseg(val); + psx_map = lightrec_get_map_idx(state, kunseg_val); + + list->flags &= ~LIGHTREC_IO_MASK; + no_mask = val == kunseg_val; + + switch (psx_map) { + case PSX_MAP_KERNEL_USER_RAM: + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + fallthrough; + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: + pr_debug("Flaging opcode %u as RAM access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); + if (no_mask && state->mirrors_mapped) + list->flags |= LIGHTREC_NO_MASK; + break; + case PSX_MAP_BIOS: + pr_debug("Flaging opcode %u as BIOS access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + break; + case PSX_MAP_SCRATCH_PAD: + pr_debug("Flaging opcode %u as scratchpad access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + + /* Consider that we're never going to run code from + * the scratchpad. */ + list->flags |= LIGHTREC_NO_INVALIDATE; + break; + case PSX_MAP_HW_REGISTERS: + if (state->ops.hw_direct && + state->ops.hw_direct(kunseg_val, + opcode_is_store(list->c), + opcode_get_io_size(list->c))) { + pr_debug("Flagging opcode %u as direct I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW); + + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + break; + } + fallthrough; + default: + pr_debug("Flagging opcode %u as I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + break; + } } - default: /* fall-through */ + fallthrough; + default: break; } - - known = lightrec_propagate_consts(list->c, known, values); } return 0; @@ -1023,7 +1709,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, mask |= opcode_read_mask(op->c); mask |= opcode_write_mask(op->c); - if (op->flags & LIGHTREC_SYNC) + if (op_flag_sync(op->flags)) sync = true; switch (op->i.op) { @@ -1032,14 +1718,11 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: /* TODO: handle backwards branches too */ - if (!last && - (op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!last && op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); reg = get_mfhi_mflo_reg(block, branch_offset, NULL, mask, sync, mflo, false); @@ -1052,6 +1735,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, } return mflo ? REG_LO : REG_HI; + case OP_META_MULT2: + case OP_META_MULTU2: + return 0; case OP_SPECIAL: switch (op->r.op) { case OP_SPECIAL_MULT: @@ -1071,8 +1757,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, if (op->r.rs != 31) return reg; - if (!sync && - !(op->flags & LIGHTREC_NO_DS) && + if (!sync && !op_flag_no_ds(op->flags) && (next->i.op == OP_SPECIAL) && ((!mflo && next->r.op == OP_SPECIAL_MFHI) || (mflo && next->r.op == OP_SPECIAL_MFLO))) @@ -1117,7 +1802,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, break; } - /* fall-through */ + fallthrough; default: continue; } @@ -1144,13 +1829,10 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: /* TODO: handle backwards branches too */ - if ((op->flags & LIGHTREC_LOCAL_BRANCH) && - (s16)op->c.i.imm >= 0) { + if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); lightrec_replace_lo_hi(block, branch_offset, last, lo); lightrec_replace_lo_hi(block, i + 1, branch_offset, lo); @@ -1170,30 +1852,57 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, return; } - /* fall-through */ + fallthrough; default: break; } } } +static bool lightrec_always_skip_div_check(void) +{ +#ifdef __mips__ + return true; +#else + return false; +#endif +} + static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *prev, *list = NULL; u8 reg_hi, reg_lo; unsigned int i; + u32 known = BIT(0); + u32 values[32] = { 0 }; for (i = 0; i < block->nb_ops - 1; i++) { + prev = list; list = &block->opcode_list[i]; - if (list->i.op != OP_SPECIAL) - continue; + if (prev) + known = lightrec_propagate_consts(list, prev, known, values); - switch (list->r.op) { - case OP_SPECIAL_MULT: - case OP_SPECIAL_MULTU: - case OP_SPECIAL_DIV: - case OP_SPECIAL_DIVU: + switch (list->i.op) { + case OP_SPECIAL: + switch (list->r.op) { + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + /* If we are dividing by a non-zero constant, don't + * emit the div-by-zero check. */ + if (lightrec_always_skip_div_check() || + ((known & BIT(list->c.r.rt)) && values[list->c.r.rt])) + list->flags |= LIGHTREC_NO_DIV_CHECK; + fallthrough; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + break; + default: + continue; + } + fallthrough; + case OP_META_MULT2: + case OP_META_MULTU2: break; default: continue; @@ -1201,8 +1910,9 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || - (list->flags & LIGHTREC_NO_DS)) + op_flag_no_ds(list->flags)) { continue; + } reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false); if (reg_lo == 0) { @@ -1372,7 +2082,8 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b if (i == ARRAY_SIZE(memset_code) - 1) { /* success! */ pr_debug("Block at PC 0x%x is a memset\n", block->pc); - block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE; + block_set_flags(block, + BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE); /* Return non-zero to skip other optimizers. */ return 1; @@ -1386,10 +2097,11 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), - IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), + IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches), IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches), + IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots), - IF_OPT(OPT_FLAG_STORES, &lightrec_flag_stores), + IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), }; diff --git a/deps/lightrec/reaper.c b/deps/lightrec/reaper.c index 2d9e68486..777b99704 100644 --- a/deps/lightrec/reaper.c +++ b/deps/lightrec/reaper.c @@ -12,6 +12,7 @@ #include #include +#include #include struct reaper_elm { @@ -23,7 +24,11 @@ struct reaper_elm { struct reaper { struct lightrec_state *state; pthread_mutex_t mutex; + pthread_cond_t cond; struct slist_elm reap_list; + + bool running; + atomic_uint sem; }; struct reaper *lightrec_reaper_init(struct lightrec_state *state) @@ -38,21 +43,36 @@ struct reaper *lightrec_reaper_init(struct lightrec_state *state) } reaper->state = state; + reaper->running = false; + reaper->sem = 0; slist_init(&reaper->reap_list); ret = pthread_mutex_init(&reaper->mutex, NULL); if (ret) { pr_err("Cannot init mutex variable: %d\n", ret); - lightrec_free(reaper->state, MEM_FOR_LIGHTREC, - sizeof(*reaper), reaper); - return NULL; + goto err_free_reaper; + } + + ret = pthread_cond_init(&reaper->cond, NULL); + if (ret) { + pr_err("Cannot init cond variable: %d\n", ret); + goto err_destroy_mutex; } return reaper; + +err_destroy_mutex: + pthread_mutex_destroy(&reaper->mutex); +err_free_reaper: + lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); + return NULL; } void lightrec_reaper_destroy(struct reaper *reaper) { + lightrec_reaper_reap(reaper); + + pthread_cond_destroy(&reaper->cond); pthread_mutex_destroy(&reaper->mutex); lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); } @@ -89,6 +109,11 @@ int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data) return ret; } +static bool lightrec_reaper_can_reap(struct reaper *reaper) +{ + return !atomic_load_explicit(&reaper->sem, memory_order_relaxed); +} + void lightrec_reaper_reap(struct reaper *reaper) { struct reaper_elm *reaper_elm; @@ -96,8 +121,10 @@ void lightrec_reaper_reap(struct reaper *reaper) pthread_mutex_lock(&reaper->mutex); - while (!!(elm = slist_first(&reaper->reap_list))) { + while (lightrec_reaper_can_reap(reaper) && + !!(elm = slist_first(&reaper->reap_list))) { slist_remove(&reaper->reap_list, elm); + reaper->running = true; pthread_mutex_unlock(&reaper->mutex); reaper_elm = container_of(elm, struct reaper_elm, slist); @@ -108,7 +135,24 @@ void lightrec_reaper_reap(struct reaper *reaper) sizeof(*reaper_elm), reaper_elm); pthread_mutex_lock(&reaper->mutex); + reaper->running = false; + pthread_cond_broadcast(&reaper->cond); } pthread_mutex_unlock(&reaper->mutex); } + +void lightrec_reaper_pause(struct reaper *reaper) +{ + atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed); + + pthread_mutex_lock(&reaper->mutex); + while (reaper->running) + pthread_cond_wait(&reaper->cond, &reaper->mutex); + pthread_mutex_unlock(&reaper->mutex); +} + +void lightrec_reaper_continue(struct reaper *reaper) +{ + atomic_fetch_sub_explicit(&reaper->sem, 1, memory_order_relaxed); +} diff --git a/deps/lightrec/reaper.h b/deps/lightrec/reaper.h index b7d493cb2..49b6a1a34 100644 --- a/deps/lightrec/reaper.h +++ b/deps/lightrec/reaper.h @@ -17,4 +17,7 @@ void lightrec_reaper_destroy(struct reaper *reaper); int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data); void lightrec_reaper_reap(struct reaper *reaper); +void lightrec_reaper_pause(struct reaper *reaper); +void lightrec_reaper_continue(struct reaper *reaper); + #endif /* __LIGHTREC_REAPER_H__ */ diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c index 0167863cc..08a9235a6 100644 --- a/deps/lightrec/recompiler.c +++ b/deps/lightrec/recompiler.c @@ -3,10 +3,12 @@ * Copyright (C) 2019-2021 Paul Cercueil */ +#include "blockcache.h" #include "debug.h" #include "interpreter.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" #include "slist.h" #include @@ -14,40 +16,152 @@ #include #include #include +#ifdef __linux__ +#include +#endif struct block_rec { struct block *block; struct slist_elm slist; + bool compiling; +}; + +struct recompiler_thd { + struct lightrec_cstate *cstate; + unsigned int tid; + pthread_t thd; }; struct recompiler { struct lightrec_state *state; - pthread_t thd; pthread_cond_t cond; + pthread_cond_t cond2; pthread_mutex_t mutex; - bool stop; - struct block *current_block; + bool stop, must_flush; struct slist_elm slist; + + pthread_mutex_t alloc_mutex; + + unsigned int nb_recs; + struct recompiler_thd thds[]; }; -static void lightrec_compile_list(struct recompiler *rec) +static unsigned int get_processors_count(void) +{ + unsigned int nb = 1; + +#if defined(PTW32_VERSION) + nb = pthread_num_processors_np(); +#elif defined(__APPLE__) || defined(__FreeBSD__) + int count; + size_t size = sizeof(count); + + nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count; +#elif defined(_SC_NPROCESSORS_ONLN) + nb = sysconf(_SC_NPROCESSORS_ONLN); +#endif + + return nb < 1 ? 1 : nb; +} + +static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head) +{ + struct block_rec *block_rec; + struct slist_elm *elm; + + for (elm = slist_first(head); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); + + if (!block_rec->compiling) + return elm; + } + + return NULL; +} + +static bool lightrec_cancel_block_rec(struct recompiler *rec, + struct block_rec *block_rec) +{ + if (block_rec->compiling) { + /* Block is being recompiled - wait for + * completion */ + pthread_cond_wait(&rec->cond2, &rec->mutex); + + /* We can't guarantee the signal was for us. + * Since block_rec may have been removed while + * we were waiting on the condition, we cannot + * check block_rec->compiling again. The best + * thing is just to restart the function. */ + return false; + } + + /* Block is not yet being processed - remove it from the list */ + slist_remove(&rec->slist, &block_rec->slist); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, + sizeof(*block_rec), block_rec); + + return true; +} + +static void lightrec_cancel_list(struct recompiler *rec) +{ + struct block_rec *block_rec; + struct slist_elm *elm, *head = &rec->slist; + + for (elm = slist_first(head); elm; elm = slist_first(head)) { + block_rec = container_of(elm, struct block_rec, slist); + lightrec_cancel_block_rec(rec, block_rec); + } +} + +static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d) +{ + struct recompiler *rec = d; + + lightrec_remove_outdated_blocks(state->block_cache, NULL); + rec->must_flush = false; +} + +static void lightrec_compile_list(struct recompiler *rec, + struct recompiler_thd *thd) { struct block_rec *block_rec; struct slist_elm *next; struct block *block; int ret; - while (!!(next = slist_first(&rec->slist))) { + while (!!(next = lightrec_get_first_elm(&rec->slist))) { block_rec = container_of(next, struct block_rec, slist); + block_rec->compiling = true; block = block_rec->block; - rec->current_block = block; pthread_mutex_unlock(&rec->mutex); - ret = lightrec_compile_block(rec->state, block); - if (ret) { - pr_err("Unable to compile block at PC 0x%x: %d\n", - block->pc, ret); + if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) { + ret = lightrec_compile_block(thd->cstate, block); + if (ret == -ENOMEM) { + /* Code buffer is full. Request the reaper to + * flush it. */ + + pthread_mutex_lock(&rec->mutex); + block_rec->compiling = false; + pthread_cond_broadcast(&rec->cond2); + + if (!rec->must_flush) { + rec->must_flush = true; + lightrec_cancel_list(rec); + + lightrec_reaper_add(rec->state->reaper, + lightrec_flush_code_buffer, + rec); + } + return; + } + + if (ret) { + pr_err("Unable to compile block at PC 0x%x: %d\n", + block->pc, ret); + } } pthread_mutex_lock(&rec->mutex); @@ -55,15 +169,14 @@ static void lightrec_compile_list(struct recompiler *rec) slist_remove(&rec->slist, next); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*block_rec), block_rec); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond2); } - - rec->current_block = NULL; } static void * lightrec_recompiler_thd(void *d) { - struct recompiler *rec = d; + struct recompiler_thd *thd = d; + struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]); pthread_mutex_lock(&rec->mutex); @@ -76,7 +189,7 @@ static void * lightrec_recompiler_thd(void *d) } while (slist_empty(&rec->slist)); - lightrec_compile_list(rec); + lightrec_compile_list(rec, thd); } out_unlock: @@ -87,62 +200,115 @@ static void * lightrec_recompiler_thd(void *d) struct recompiler *lightrec_recompiler_init(struct lightrec_state *state) { struct recompiler *rec; + unsigned int i, nb_recs, nb_cpus; int ret; - rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)); + nb_cpus = get_processors_count(); + nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1; + + rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec) + + nb_recs * sizeof(*rec->thds)); if (!rec) { pr_err("Cannot create recompiler: Out of memory\n"); return NULL; } + for (i = 0; i < nb_recs; i++) { + rec->thds[i].tid = i; + rec->thds[i].cstate = NULL; + } + + for (i = 0; i < nb_recs; i++) { + rec->thds[i].cstate = lightrec_create_cstate(state); + if (!rec->thds[i].cstate) { + pr_err("Cannot create recompiler: Out of memory\n"); + goto err_free_cstates; + } + } + rec->state = state; rec->stop = false; - rec->current_block = NULL; + rec->must_flush = false; + rec->nb_recs = nb_recs; slist_init(&rec->slist); ret = pthread_cond_init(&rec->cond, NULL); if (ret) { pr_err("Cannot init cond variable: %d\n", ret); - goto err_free_rec; + goto err_free_cstates; } - ret = pthread_mutex_init(&rec->mutex, NULL); + ret = pthread_cond_init(&rec->cond2, NULL); if (ret) { - pr_err("Cannot init mutex variable: %d\n", ret); + pr_err("Cannot init cond variable: %d\n", ret); goto err_cnd_destroy; } - ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec); + ret = pthread_mutex_init(&rec->alloc_mutex, NULL); if (ret) { - pr_err("Cannot create recompiler thread: %d\n", ret); - goto err_mtx_destroy; + pr_err("Cannot init alloc mutex variable: %d\n", ret); + goto err_cnd2_destroy; } - pr_info("Threaded recompiler started\n"); + ret = pthread_mutex_init(&rec->mutex, NULL); + if (ret) { + pr_err("Cannot init mutex variable: %d\n", ret); + goto err_alloc_mtx_destroy; + } + + for (i = 0; i < nb_recs; i++) { + ret = pthread_create(&rec->thds[i].thd, NULL, + lightrec_recompiler_thd, &rec->thds[i]); + if (ret) { + pr_err("Cannot create recompiler thread: %d\n", ret); + /* TODO: Handle cleanup properly */ + goto err_mtx_destroy; + } + } + + pr_info("Threaded recompiler started with %u workers.\n", nb_recs); return rec; err_mtx_destroy: pthread_mutex_destroy(&rec->mutex); +err_alloc_mtx_destroy: + pthread_mutex_destroy(&rec->alloc_mutex); +err_cnd2_destroy: + pthread_cond_destroy(&rec->cond2); err_cnd_destroy: pthread_cond_destroy(&rec->cond); -err_free_rec: +err_free_cstates: + for (i = 0; i < nb_recs; i++) { + if (rec->thds[i].cstate) + lightrec_free_cstate(rec->thds[i].cstate); + } lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); return NULL; } void lightrec_free_recompiler(struct recompiler *rec) { + unsigned int i; + rec->stop = true; /* Stop the thread */ pthread_mutex_lock(&rec->mutex); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond); + lightrec_cancel_list(rec); pthread_mutex_unlock(&rec->mutex); - pthread_join(rec->thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + pthread_join(rec->thds[i].thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + lightrec_free_cstate(rec->thds[i].cstate); pthread_mutex_destroy(&rec->mutex); + pthread_mutex_destroy(&rec->alloc_mutex); pthread_cond_destroy(&rec->cond); + pthread_cond_destroy(&rec->cond2); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); } @@ -154,9 +320,15 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) pthread_mutex_lock(&rec->mutex); + /* If the recompiler must flush the code cache, we can't add the new + * job. It will be re-added next time the block's address is jumped to + * again. */ + if (rec->must_flush) + goto out_unlock; + /* If the block is marked as dead, don't compile it, it will be removed * as soon as it's safe. */ - if (block->flags & BLOCK_IS_DEAD) + if (block_has_flag(block, BLOCK_IS_DEAD)) goto out_unlock; for (elm = slist_first(&rec->slist), prev = NULL; elm; @@ -167,7 +339,8 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* The block to compile is already in the queue - bump * it to the top of the list, unless the block is being * recompiled. */ - if (prev && !(block->flags & BLOCK_SHOULD_RECOMPILE)) { + if (prev && !block_rec->compiling && + !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) { slist_remove_next(prev); slist_append(&rec->slist, elm); } @@ -178,7 +351,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* By the time this function was called, the block has been recompiled * and ins't in the wait list anymore. Just return here. */ - if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE)) + if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) goto out_unlock; block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC, @@ -191,12 +364,13 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) pr_debug("Adding block PC 0x%x to recompiler\n", block->pc); block_rec->block = block; + block_rec->compiling = false; elm = &rec->slist; /* If the block is being recompiled, push it to the end of the queue; * otherwise push it to the front of the queue. */ - if (block->flags & BLOCK_SHOULD_RECOMPILE) + if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) for (; elm->next; elm = elm->next); slist_append(elm, &block_rec->slist); @@ -206,6 +380,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) out_unlock: pthread_mutex_unlock(&rec->mutex); + return ret; } @@ -216,49 +391,59 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block) pthread_mutex_lock(&rec->mutex); - for (elm = slist_first(&rec->slist); elm; elm = elm->next) { - block_rec = container_of(elm, struct block_rec, slist); + while (true) { + for (elm = slist_first(&rec->slist); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); - if (block_rec->block == block) { - if (block == rec->current_block) { - /* Block is being recompiled - wait for - * completion */ - do { - pthread_cond_wait(&rec->cond, - &rec->mutex); - } while (block == rec->current_block); - } else { - /* Block is not yet being processed - remove it - * from the list */ - slist_remove(&rec->slist, elm); - lightrec_free(rec->state, MEM_FOR_LIGHTREC, - sizeof(*block_rec), block_rec); + if (block_rec->block == block) { + if (lightrec_cancel_block_rec(rec, block_rec)) + goto out_unlock; + + break; } + } + if (!elm) break; - } } +out_unlock: pthread_mutex_unlock(&rec->mutex); } void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, struct block *block, u32 *pc) { - bool freed; + u8 old_flags; + + /* There's no point in running the first pass if the block will never + * be compiled. Let the main loop run the interpreter instead. */ + if (block_has_flag(block, BLOCK_NEVER_COMPILE)) + return NULL; + + /* The block is marked as dead, and will be removed the next time the + * reaper is run. In the meantime, the old function can still be + * executed. */ + if (block_has_flag(block, BLOCK_IS_DEAD)) + return block->function; + + /* If the block is already fully tagged, there is no point in running + * the first pass. Request a recompilation of the block, and maybe the + * interpreter will run the block in the meantime. */ + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) + lightrec_recompiler_add(state->rec, block); if (likely(block->function)) { - if (block->flags & BLOCK_FULLY_TAGGED) { - freed = atomic_flag_test_and_set(&block->op_list_freed); + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - if (!freed) { + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); /* The block was already compiled but the opcode list * didn't get freed yet - do it now */ - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + lightrec_free_opcode_list(state, block->opcode_list); } } @@ -267,24 +452,36 @@ void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, /* Mark the opcode list as freed, so that the threaded compiler won't * free it while we're using it in the interpreter. */ - freed = atomic_flag_test_and_set(&block->op_list_freed); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); /* Block wasn't compiled yet - run the interpreter */ *pc = lightrec_emulate_block(state, block, *pc); - if (!freed) - atomic_flag_clear(&block->op_list_freed); + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + block_clear_flags(block, BLOCK_NO_OPCODE_LIST); /* The block got compiled while the interpreter was running. * We can free the opcode list now. */ - if (block->function && (block->flags & BLOCK_FULLY_TAGGED) && - !atomic_flag_test_and_set(&block->op_list_freed)) { - pr_debug("Block PC 0x%08x is fully tagged" - " - free opcode list\n", block->pc); + if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + + lightrec_free_opcode_list(state, block->opcode_list); + } } return NULL; } + +void lightrec_code_alloc_lock(struct lightrec_state *state) +{ + pthread_mutex_lock(&state->rec->alloc_mutex); +} + +void lightrec_code_alloc_unlock(struct lightrec_state *state) +{ + pthread_mutex_unlock(&state->rec->alloc_mutex); +} diff --git a/deps/lightrec/recompiler.h b/deps/lightrec/recompiler.h index 9bc522d14..b9fc57981 100644 --- a/deps/lightrec/recompiler.h +++ b/deps/lightrec/recompiler.h @@ -18,4 +18,7 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block); void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, struct block *block, u32 *pc); +void lightrec_code_alloc_lock(struct lightrec_state *state); +void lightrec_code_alloc_unlock(struct lightrec_state *state); + #endif /* __LIGHTREC_RECOMPILER_H__ */ diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index a19c35815..1f11d8a27 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -5,16 +5,28 @@ #include "debug.h" #include "memmanager.h" +#include "lightning-wrapper.h" #include "regcache.h" -#include #include #include +enum reg_priority { + REG_IS_TEMP, + REG_IS_TEMP_VALUE, + REG_IS_ZERO, + REG_IS_LOADED, + REG_IS_DIRTY, + + REG_NB_PRIORITIES, +}; + struct native_register { - bool used, loaded, dirty, output, extend, extended, + bool used, output, extend, extended, zero_extend, zero_extended, locked; s8 emulated_register; + intptr_t value; + enum reg_priority prio; }; struct regcache { @@ -40,6 +52,24 @@ const char * lightrec_reg_name(u8 reg) return mips_regs[reg]; } +static inline bool lightrec_reg_is_zero(u8 jit_reg) +{ +#if defined(__mips__) || defined(__alpha__) || defined(__riscv) + if (jit_reg == _ZERO) + return true; +#endif + return false; +} + +static inline s8 lightrec_get_hardwired_reg(u8 reg) +{ +#if defined(__mips__) || defined(__alpha__) || defined(__riscv) + if (reg == 0) + return _ZERO; +#endif + return -1; +} + static inline u8 lightrec_reg_number(const struct regcache *cache, const struct native_register *nreg) { @@ -51,7 +81,11 @@ static inline u8 lightrec_reg_to_lightning(const struct regcache *cache, const struct native_register *nreg) { u8 offset = lightrec_reg_number(cache, nreg); - return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS); + + if (offset < NUM_REGS) + return JIT_V(FIRST_REG + offset); + else + return JIT_R(FIRST_TEMP + offset - NUM_REGS); } static inline struct native_register * lightning_reg_to_lightrec( @@ -60,22 +94,26 @@ static inline struct native_register * lightning_reg_to_lightrec( if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) || (JIT_V0 < JIT_R0 && reg < JIT_R0)) { if (JIT_V1 > JIT_V0) - return &cache->lightrec_regs[reg - JIT_V0]; + return &cache->lightrec_regs[reg - JIT_V(FIRST_REG)]; else - return &cache->lightrec_regs[JIT_V0 - reg]; + return &cache->lightrec_regs[JIT_V(FIRST_REG) - reg]; } else { if (JIT_R1 > JIT_R0) - return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0]; + return &cache->lightrec_regs[NUM_REGS + reg - JIT_R(FIRST_TEMP)]; else - return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg]; + return &cache->lightrec_regs[NUM_REGS + JIT_R(FIRST_TEMP) - reg]; } } u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; u8 flags = 0; + if (lightrec_reg_is_zero(jit_reg)) + return REG_EXT | REG_ZEXT; + + reg = lightning_reg_to_lightrec(cache, jit_reg); if (reg->extended) flags |= REG_EXT; if (reg->zero_extended) @@ -86,14 +124,19 @@ u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg) void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; - reg->extend = flags & REG_EXT; - reg->zero_extend = flags & REG_ZEXT; + if (!lightrec_reg_is_zero(jit_reg)) { + reg = lightning_reg_to_lightrec(cache, jit_reg); + reg->extend = flags & REG_EXT; + reg->zero_extend = flags & REG_ZEXT; + } } static struct native_register * alloc_temp(struct regcache *cache) { + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* We search the register list in reverse order. As temporaries are @@ -101,18 +144,18 @@ static struct native_register * alloc_temp(struct regcache *cache) * caller-saved registers, as they won't have to be saved back to * memory. */ for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used && !nreg->loaded && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i - 1]; - for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static struct native_register * find_mapped_reg(struct regcache *cache, @@ -122,9 +165,9 @@ static struct native_register * find_mapped_reg(struct regcache *cache, for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { struct native_register *nreg = &cache->lightrec_regs[i]; - if ((!reg || nreg->loaded || nreg->dirty) && - nreg->emulated_register == reg && - (!out || !nreg->locked)) + if ((nreg->prio >= REG_IS_ZERO) && + nreg->emulated_register == reg && + (!out || !nreg->locked)) return nreg; } @@ -134,7 +177,8 @@ static struct native_register * find_mapped_reg(struct regcache *cache, static struct native_register * alloc_in_out(struct regcache *cache, u8 reg, bool out) { - struct native_register *nreg; + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* Try to find if the register is already mapped somewhere */ @@ -142,49 +186,40 @@ static struct native_register * alloc_in_out(struct regcache *cache, if (nreg) return nreg; - /* Try to allocate a non-dirty, non-loaded register. - * Loaded registers may be re-used later, so it's better to avoid - * re-using one if possible. */ - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty && !nreg->loaded) - return nreg; - } + nreg = NULL; - /* Try to allocate a non-dirty register */ for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i]; - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static void lightrec_discard_nreg(struct native_register *nreg) { nreg->extended = false; nreg->zero_extended = false; - nreg->loaded = false; nreg->output = false; - nreg->dirty = false; nreg->used = false; nreg->locked = false; nreg->emulated_register = -1; + nreg->prio = 0; } static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, struct native_register *nreg, u8 jit_reg) { /* If we get a dirty register, store back the old value */ - if (nreg->dirty) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio == REG_IS_DIRTY) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); @@ -195,6 +230,9 @@ static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { + if (lightrec_reg_is_zero(jit_reg)) + return; + lightrec_unload_nreg(cache, _jit, lightning_reg_to_lightrec(cache, jit_reg), jit_reg); } @@ -203,8 +241,12 @@ void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) * A locked register cannot only be used as input, not output. */ void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; + + if (lightrec_reg_is_zero(jit_reg)) + return; + reg = lightning_reg_to_lightrec(cache, jit_reg); lightrec_clean_reg(cache, _jit, jit_reg); reg->locked = true; @@ -212,11 +254,16 @@ void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; + + if (lightrec_reg_is_zero(jit_reg)) + return jit_reg; + reg = lightning_reg_to_lightrec(cache, jit_reg); lightrec_unload_nreg(cache, _jit, reg, jit_reg); reg->used = true; + reg->prio = REG_IS_LOADED; return jit_reg; } @@ -233,15 +280,50 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit) jit_reg = lightrec_reg_to_lightning(cache, nreg); lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + nreg->prio = REG_IS_TEMP; nreg->used = true; return jit_reg; } +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value) +{ + struct native_register *nreg; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + + if (nreg->prio == REG_IS_TEMP_VALUE && nreg->value == value) { + nreg->used = true; + return lightrec_reg_to_lightning(cache, nreg); + } + } + + return -1; +} + +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value) +{ + struct native_register *nreg; + + nreg = lightning_reg_to_lightrec(cache, jit_reg); + + nreg->prio = REG_IS_TEMP_VALUE; + nreg->value = value; +} + u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 flags) { + struct native_register *nreg; u8 jit_reg; - struct native_register *nreg = alloc_in_out(cache, reg, true); + s8 hw_reg; + + hw_reg = lightrec_get_hardwired_reg(reg); + if (hw_reg >= 0) + return (u8) hw_reg; + + nreg = alloc_in_out(cache, reg, true); if (!nreg) { /* No free register, no dirty register to free. */ pr_err("No more registers! Abandon ship!\n"); @@ -260,15 +342,23 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, nreg->emulated_register = reg; nreg->extend = flags & REG_EXT; nreg->zero_extend = flags & REG_ZEXT; + nreg->prio = reg ? REG_IS_LOADED : REG_IS_ZERO; return jit_reg; } u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 flags) { + struct native_register *nreg; u8 jit_reg; bool reg_changed; - struct native_register *nreg = alloc_in_out(cache, reg, false); + s8 hw_reg; + + hw_reg = lightrec_get_hardwired_reg(reg); + if (hw_reg >= 0) + return (u8) hw_reg; + + nreg = alloc_in_out(cache, reg, false); if (!nreg) { /* No free register, no dirty register to free. */ pr_err("No more registers! Abandon ship!\n"); @@ -283,32 +373,28 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, if (reg_changed) lightrec_unload_nreg(cache, _jit, nreg, jit_reg); - if (!nreg->loaded && !nreg->dirty && reg != 0) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio < REG_IS_LOADED && reg != 0) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2); nreg->zero_extended = flags & REG_ZEXT; nreg->extended = !nreg->zero_extended; /* Load previous value from register cache */ -#if __WORDSIZE == 64 if (nreg->zero_extended) jit_ldxi_ui(jit_reg, LIGHTREC_REG_STATE, offset); else jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); -#else - jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); -#endif - nreg->loaded = true; + nreg->prio = REG_IS_LOADED; } /* Clear register r0 before use */ - if (reg == 0 && (!nreg->loaded || nreg->dirty)) { + if (reg == 0 && nreg->prio != REG_IS_ZERO) { jit_movi(jit_reg, 0); nreg->extended = true; nreg->zero_extended = true; - nreg->loaded = true; + nreg->prio = REG_IS_ZERO; } nreg->used = true; @@ -319,16 +405,12 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, (!nreg->zero_extended || !(flags & REG_ZEXT))) { nreg->extended = true; nreg->zero_extended = false; -#if __WORDSIZE == 64 jit_extr_i(jit_reg, jit_reg); -#endif } else if (!(flags & REG_EXT) && (flags & REG_ZEXT) && !nreg->zero_extended) { nreg->zero_extended = true; nreg->extended = false; -#if __WORDSIZE == 64 jit_extr_ui(jit_reg, jit_reg); -#endif } return jit_reg; @@ -351,14 +433,14 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, lightrec_unload_nreg(cache, _jit, nreg, jit_reg); /* Load previous value from register cache */ - offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2); + offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2); jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); nreg->extended = true; nreg->zero_extended = false; nreg->used = true; - nreg->loaded = true; nreg->emulated_register = reg; + nreg->prio = REG_IS_LOADED; return jit_reg; } @@ -367,7 +449,7 @@ static void free_reg(struct native_register *nreg) { /* Set output registers as dirty */ if (nreg->used && nreg->output && nreg->emulated_register > 0) - nreg->dirty = true; + nreg->prio = REG_IS_DIRTY; if (nreg->output) { nreg->extended = nreg->extend; nreg->zero_extended = nreg->zero_extend; @@ -377,7 +459,8 @@ static void free_reg(struct native_register *nreg) void lightrec_free_reg(struct regcache *cache, u8 jit_reg) { - free_reg(lightning_reg_to_lightrec(cache, jit_reg)); + if (!lightrec_reg_is_zero(jit_reg)) + free_reg(lightning_reg_to_lightrec(cache, jit_reg)); } void lightrec_free_regs(struct regcache *cache) @@ -391,13 +474,18 @@ void lightrec_free_regs(struct regcache *cache) static void clean_reg(jit_state_t *_jit, struct native_register *nreg, u8 jit_reg, bool clean) { - if (nreg->dirty) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio == REG_IS_DIRTY) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); - nreg->loaded |= nreg->dirty; - nreg->dirty ^= clean; + + if (clean) { + if (nreg->emulated_register == 0) + nreg->prio = REG_IS_ZERO; + else + nreg->prio = REG_IS_LOADED; + } } } @@ -405,11 +493,13 @@ static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean) { unsigned int i; - for (i = 0; i < NUM_REGS; i++) - clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean); + for (i = 0; i < NUM_REGS; i++) { + clean_reg(_jit, &cache->lightrec_regs[i], + JIT_V(FIRST_REG + i), clean); + } for (i = 0; i < NUM_TEMPS; i++) { clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS], - JIT_R(i), clean); + JIT_R(FIRST_TEMP + i), clean); } } @@ -423,10 +513,25 @@ void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit) clean_regs(cache, _jit, true); } +bool lightrec_has_dirty_regs(struct regcache *cache) +{ + unsigned int i; + + for (i = 0; i < NUM_REGS + NUM_TEMPS; i++) + if (cache->lightrec_regs[i].prio == REG_IS_DIRTY) + return true; + + return false; +} + void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); - clean_reg(_jit, reg, jit_reg, true); + struct native_register *reg; + + if (!lightrec_reg_is_zero(jit_reg)) { + reg = lightning_reg_to_lightrec(cache, jit_reg); + clean_reg(_jit, reg, jit_reg, true); + } } void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, @@ -446,6 +551,15 @@ void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, } } +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg) +{ + struct native_register *nreg; + + nreg = find_mapped_reg(cache, reg, false); + if (nreg) + lightrec_discard_nreg(nreg); +} + struct native_register * lightrec_regcache_enter_branch(struct regcache *cache) { struct native_register *backup; @@ -501,15 +615,18 @@ void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit) for (i = 0; i < NUM_REGS; i++) { nreg = &cache->lightrec_regs[i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_V(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_V(FIRST_REG + i)); } #endif for (i = 0; i < NUM_TEMPS; i++) { nreg = &cache->lightrec_regs[NUM_REGS + i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_R(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_R(FIRST_TEMP + i)); } + + jit_live(LIGHTREC_REG_STATE); + jit_live(LIGHTREC_REG_CYCLE); } diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index 835c9c92e..cffbf0533 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -6,12 +6,25 @@ #ifndef __REGCACHE_H__ #define __REGCACHE_H__ -#include "lightrec-private.h" +#include "lightning-wrapper.h" -#define NUM_REGS (JIT_V_NUM - 2) -#define NUM_TEMPS (JIT_R_NUM) +#define NUM_REGS (JIT_V_NUM - 1) #define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) -#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2)) + +#if defined(__powerpc__) +# define NUM_TEMPS JIT_R_NUM +/* JIT_R0 is callee-saved on PowerPC, we have to use something else */ +# define LIGHTREC_REG_CYCLE _R10 +# define FIRST_TEMP 0 +#else +# define NUM_TEMPS (JIT_R_NUM - 1) +# define LIGHTREC_REG_CYCLE JIT_R0 +# define FIRST_TEMP 1 +#endif + +#include "lightrec-private.h" + +#define FIRST_REG 0 /* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */ #define REG_EXT BIT(0) /* register is sign-extended */ @@ -35,6 +48,9 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 jit_reg); +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value); +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value); + u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg); void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags); @@ -47,9 +63,11 @@ void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit); void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); +_Bool lightrec_has_dirty_regs(struct regcache *cache); void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u8 reg, _Bool unload); +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg); u8 lightrec_alloc_reg_in_address(struct regcache *cache, jit_state_t *_jit, u8 reg, s16 offset); diff --git a/deps/lightrec/slist.h b/deps/lightrec/slist.h index ae7e5d3e1..37557e64d 100644 --- a/deps/lightrec/slist.h +++ b/deps/lightrec/slist.h @@ -6,6 +6,8 @@ #ifndef __LIGHTREC_SLIST_H__ #define __LIGHTREC_SLIST_H__ +#include + #define container_of(ptr, type, member) \ ((type *)((void *)(ptr) - offsetof(type, member))) diff --git a/deps/lightrec/tlsf/.gitrepo b/deps/lightrec/tlsf/.gitrepo new file mode 100644 index 000000000..692e54257 --- /dev/null +++ b/deps/lightrec/tlsf/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; +[subrepo] + remote = https://github.com/mattconte/tlsf + branch = master + commit = deff9ab509341f264addbd3c8ada533678591905 + parent = 1dc0344052e7379e16753e4a285c30fd158bf78d + method = merge + cmdver = 0.4.3 diff --git a/deps/lightrec/tlsf/README.md b/deps/lightrec/tlsf/README.md new file mode 100644 index 000000000..982919fc7 --- /dev/null +++ b/deps/lightrec/tlsf/README.md @@ -0,0 +1,92 @@ +# tlsf +Two-Level Segregated Fit memory allocator implementation. +Written by Matthew Conte (matt@baisoku.org). +Released under the BSD license. + +Features +-------- + * O(1) cost for malloc, free, realloc, memalign + * Extremely low overhead per allocation (4 bytes) + * Low overhead per TLSF management of pools (~3kB) + * Low fragmentation + * Compiles to only a few kB of code and data + * Support for adding and removing memory pool regions on the fly + +Caveats +------- + * Currently, assumes architecture can make 4-byte aligned accesses + * Not designed to be thread safe; the user must provide this + +Notes +----- +This code was based on the TLSF 1.4 spec and documentation found at: + + http://www.gii.upv.es/tlsf/main/docs + +It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes. + +History +------- +2016/04/10 - v3.1 + * Code moved to github + * tlsfbits.h rolled into tlsf.c + * License changed to BSD + +2014/02/08 - v3.0 + * This version is based on improvements from 3DInteractive GmbH + * Interface changed to allow more than one memory pool + * Separated pool handling from control structure (adding, removing, debugging) + * Control structure and pools can still be constructed in the same memory block + * Memory blocks for control structure and pools are checked for alignment + * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation + * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead() + * Pool must be empty when it is removed, in order to allow O(1) removal + +2011/10/20 - v2.0 + * 64-bit support + * More compiler intrinsics for ffs/fls + * ffs/fls verification during TLSF creation in debug builds + +2008/04/04 - v1.9 + * Add tlsf_heap_check, a heap integrity check + * Support a predefined tlsf_assert macro + * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path + +2007/02/08 - v1.8 + * Fix for unnecessary reallocation in tlsf_realloc + +2007/02/03 - v1.7 + * tlsf_heap_walk takes a callback + * tlsf_realloc now returns NULL on failure + * tlsf_memalign optimization for 4-byte alignment + * Usage of size_t where appropriate + +2006/11/21 - v1.6 + * ffs/fls broken out into tlsfbits.h + * tlsf_overhead queries per-pool overhead + +2006/11/07 - v1.5 + * Smart realloc implementation + * Smart memalign implementation + +2006/10/11 - v1.4 + * Add some ffs/fls implementations + * Minor code footprint reduction + +2006/09/14 - v1.3 + * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling + * Reduce pool overhead by about 1kb + * Reduce minimum block size from 32 to 12 bytes + * Realloc bug fix + +2006/09/09 - v1.2 + * Add tlsf_block_size + * Static assertion mechanism for invariants + * Minor bugfixes + +2006/09/01 - v1.1 + * Add tlsf_realloc + * Add tlsf_walk_heap + +2006/08/25 - v1.0 + * First release diff --git a/deps/lightrec/tlsf/tlsf.c b/deps/lightrec/tlsf/tlsf.c new file mode 100644 index 000000000..af575737c --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.c @@ -0,0 +1,1264 @@ +#include +#include +#include +#include +#include +#include + +#include "tlsf.h" + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} + +size_t tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int tlsf_check_pool(pool_t pool) +{ + /* Check that the blocks are physically correct. */ + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + +size_t tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t tlsf_block_size_max(void) +{ + return block_size_max; +} + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +size_t tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +void tlsf_destroy(tlsf_t tlsf) +{ + /* Nothing to do. */ + (void)tlsf; +} + +pool_t tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + return block_prepare_used(control, block, adjust); +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + return block_prepare_used(control, block, adjust); +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + p = ptr; + } + } + + return p; +} diff --git a/deps/lightrec/tlsf/tlsf.h b/deps/lightrec/tlsf/tlsf.h new file mode 100644 index 000000000..e9b5a91c0 --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.h @@ -0,0 +1,90 @@ +#ifndef INCLUDED_tlsf +#define INCLUDED_tlsf + +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +/* Create/destroy a memory pool. */ +tlsf_t tlsf_create(void* mem); +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes); +void tlsf_destroy(tlsf_t tlsf); +pool_t tlsf_get_pool(tlsf_t tlsf); + +/* Add/remove memory pools. */ +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool); + +/* malloc/memalign/realloc/free replacements. */ +void* tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void tlsf_free(tlsf_t tlsf, void* ptr); + +/* Returns internal block size, not original request size */ +size_t tlsf_block_size(void* ptr); + +/* Overheads/limits of internal structures. */ +size_t tlsf_size(void); +size_t tlsf_align_size(void); +size_t tlsf_block_size_min(void); +size_t tlsf_block_size_max(void); +size_t tlsf_pool_overhead(void); +size_t tlsf_alloc_overhead(void); + +/* Debugging. */ +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +/* Returns nonzero if any internal consistency check fails. */ +int tlsf_check(tlsf_t tlsf); +int tlsf_check_pool(pool_t pool); + +#if defined(__cplusplus) +}; +#endif + +#endif diff --git a/lightning-lightrec-include/debug.h b/include/debug.h similarity index 100% rename from lightning-lightrec-include/debug.h rename to include/debug.h diff --git a/lightning-lightrec-include/lightning.h b/include/lightning-actual.h similarity index 86% rename from lightning-lightrec-include/lightning.h rename to include/lightning-actual.h index 503eda29a..3b7b79afc 100644 --- a/lightning-lightrec-include/lightning.h +++ b/include/lightning-actual.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -24,6 +24,7 @@ #include #include #include +#include #if defined(__hpux) && defined(__hppa__) # include @@ -123,6 +124,11 @@ typedef jit_int32_t jit_bool_t; typedef jit_int32_t jit_gpr_t; typedef jit_int32_t jit_fpr_t; +#if !defined(__powerpc__) && \ + (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__)) +#define __powerpc__ 1 +#endif + #if defined(__i386__) || defined(__x86_64__) # include #elif defined(__mips__) @@ -145,6 +151,8 @@ typedef jit_int32_t jit_fpr_t; # include #elif defined(__riscv) # include +#elif defined(__loongarch__) +# include #endif #define jit_flag_node 0x0001 /* patch node not absolute */ @@ -182,6 +190,8 @@ typedef enum { #define jit_align(u) jit_new_node_w(jit_code_align, u) jit_code_live, jit_code_align, jit_code_save, jit_code_load, +#define jit_skip(u) jit_new_node_w(jit_code_skip, u) + jit_code_skip, #define jit_name(u) _jit_name(_jit,u) jit_code_name, #define jit_note(u, v) _jit_note(_jit, u, v) @@ -202,27 +212,80 @@ typedef enum { #define jit_allocar(u, v) _jit_allocar(_jit,u,v) jit_code_allocai, jit_code_allocar, -#define jit_arg() _jit_arg(_jit) - jit_code_arg, +#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c) +#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s) +#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i) +# if __WORDSIZE == 32 +# define jit_arg() jit_arg_i() +#else +# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l) +# define jit_arg() jit_arg_l() +#endif + jit_code_arg_c, jit_code_arg_s, + jit_code_arg_i, jit_code_arg_l, +#if __WORDSIZE == 32 +# define jit_code_arg jit_code_arg_i +#else +# define jit_code_arg jit_code_arg_l +#endif + #define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) #define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v) - jit_code_getarg_c, jit_code_getarg_uc, #define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v) #define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v) - jit_code_getarg_s, jit_code_getarg_us, #define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v) #if __WORDSIZE == 32 # define jit_getarg(u,v) jit_getarg_i(u,v) #else -# define jit_getarg(u,v) jit_getarg_l(u,v) # define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v) # define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v) +# define jit_getarg(u,v) jit_getarg_l(u,v) #endif + jit_code_getarg_c, jit_code_getarg_uc, + jit_code_getarg_s, jit_code_getarg_us, jit_code_getarg_i, jit_code_getarg_ui, jit_code_getarg_l, -# define jit_putargr(u,v) _jit_putargr(_jit,u,v) -# define jit_putargi(u,v) _jit_putargi(_jit,u,v) - jit_code_putargr, jit_code_putargi, +#if __WORDSIZE == 32 +# define jit_code_getarg jit_code_getarg_i +#else +# define jit_code_getarg jit_code_getarg_l +#endif + +#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c) +#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c) +#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc) +#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc) +#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s) +#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s) +#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us) +#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us) +#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i) +#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i) +#if __WORDSIZE == 32 +# define jit_putargr(u,v) jit_putargr_i(u,v) +# define jit_putargi(u,v) jit_putargi_i(u,v) +#else +# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui) +# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui) +# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l) +# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l) +# define jit_putargr(u,v) jit_putargr_l(u,v) +# define jit_putargi(u,v) jit_putargi_l(u,v) +#endif + jit_code_putargr_c, jit_code_putargi_c, + jit_code_putargr_uc, jit_code_putargi_uc, + jit_code_putargr_s, jit_code_putargi_s, + jit_code_putargr_us, jit_code_putargi_us, + jit_code_putargr_i, jit_code_putargi_i, + jit_code_putargr_ui, jit_code_putargi_ui, + jit_code_putargr_l, jit_code_putargi_l, +#if __WORDSIZE == 32 +# define jit_code_putargr jit_code_putargr_i +# define jit_code_putargi jit_code_putargi_i +#else +# define jit_code_putargr jit_code_putargr_l +# define jit_code_putargi jit_code_putargi_l +#endif #define jit_va_start(u) jit_new_node_w(jit_code_va_start, u) jit_code_va_start, @@ -339,6 +402,15 @@ typedef enum { #define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v) #define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v) jit_code_movr, jit_code_movi, + +#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) +#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) + jit_code_movnr, jit_code_movzr, + + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + #define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) #define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) jit_code_extr_c, jit_code_extr_uc, @@ -351,6 +423,18 @@ typedef enum { #endif jit_code_extr_i, jit_code_extr_ui, +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif + #define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) #define jit_ntohr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) jit_code_htonr_us, @@ -537,33 +621,106 @@ typedef enum { #define jit_prepare() _jit_prepare(_jit) jit_code_prepare, -#define jit_pushargr(u) _jit_pushargr(_jit,u) -#define jit_pushargi(u) _jit_pushargi(_jit,u) - jit_code_pushargr, jit_code_pushargi, + +#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c) +#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c) +#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc) +#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc) +#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s) +#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s) +#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us) +#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us) +#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i) +#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i) +#if __WORDSIZE == 32 +# define jit_pushargr(u) jit_pushargr_i(u) +# define jit_pushargi(u) jit_pushargi_i(u) +#else +# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui) +# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui) +# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l) +# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l) +# define jit_pushargr(u) jit_pushargr_l(u) +# define jit_pushargi(u) jit_pushargi_l(u) +#endif + jit_code_pushargr_c, jit_code_pushargi_c, + jit_code_pushargr_uc, jit_code_pushargi_uc, + jit_code_pushargr_s, jit_code_pushargi_s, + jit_code_pushargr_us, jit_code_pushargi_us, + jit_code_pushargr_i, jit_code_pushargi_i, + jit_code_pushargr_ui, jit_code_pushargi_ui, + jit_code_pushargr_l, jit_code_pushargi_l, +#if __WORDSIZE == 32 +# define jit_code_pushargr jit_code_pushargr_i +# define jit_code_pushargi jit_code_pushargi_i +#else +# define jit_code_pushargr jit_code_pushargr_l +# define jit_code_pushargi jit_code_pushargi_l +#endif + #define jit_finishr(u) _jit_finishr(_jit,u) #define jit_finishi(u) _jit_finishi(_jit,u) jit_code_finishr, jit_code_finishi, #define jit_ret() _jit_ret(_jit) jit_code_ret, -#define jit_retr(u) _jit_retr(_jit,u) -#define jit_reti(u) _jit_reti(_jit,u) - jit_code_retr, jit_code_reti, + +#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c) +#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c) +#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc) +#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc) +#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s) +#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s) +#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us) +#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us) +#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i) +#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i) +#if __WORDSIZE == 32 +# define jit_retr(u) jit_retr_i(u) +# define jit_reti(u) jit_reti_i(u) +#else +# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui) +# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui) +# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l) +# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l) +# define jit_retr(u) jit_retr_l(u) +# define jit_reti(u) jit_reti_l(u) +#endif + jit_code_retr_c, jit_code_reti_c, + jit_code_retr_uc, jit_code_reti_uc, + jit_code_retr_s, jit_code_reti_s, + jit_code_retr_us, jit_code_reti_us, + jit_code_retr_i, jit_code_reti_i, + jit_code_retr_ui, jit_code_reti_ui, + jit_code_retr_l, jit_code_reti_l, +#if __WORDSIZE == 32 +# define jit_code_retr jit_code_retr_i +# define jit_code_reti jit_code_reti_i +#else +# define jit_code_retr jit_code_retr_l +# define jit_code_reti jit_code_reti_l +#endif + #define jit_retval_c(u) _jit_retval_c(_jit,u) #define jit_retval_uc(u) _jit_retval_uc(_jit,u) - jit_code_retval_c, jit_code_retval_uc, #define jit_retval_s(u) _jit_retval_s(_jit,u) #define jit_retval_us(u) _jit_retval_us(_jit,u) - jit_code_retval_s, jit_code_retval_us, #define jit_retval_i(u) _jit_retval_i(_jit,u) #if __WORDSIZE == 32 # define jit_retval(u) jit_retval_i(u) #else -# define jit_retval(u) jit_retval_l(u) # define jit_retval_ui(u) _jit_retval_ui(_jit,u) # define jit_retval_l(u) _jit_retval_l(_jit,u) +# define jit_retval(u) jit_retval_l(u) #endif + jit_code_retval_c, jit_code_retval_uc, + jit_code_retval_s, jit_code_retval_us, jit_code_retval_i, jit_code_retval_ui, jit_code_retval_l, +#if __WORDSIZE == 32 +# define jit_code_retval jit_code_retval_i +#else +# define jit_code_retval jit_code_retval_l +#endif #define jit_epilog() _jit_epilog(_jit) jit_code_epilog, @@ -931,7 +1088,8 @@ extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); extern void _jit_ellipsis(jit_state_t*); -extern jit_node_t *_jit_arg(jit_state_t*); +extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t); + extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*); @@ -941,19 +1099,24 @@ extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*); #endif -extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*); -extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*); + +extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t); +extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t); extern void _jit_prepare(jit_state_t*); extern void _jit_ellipsis(jit_state_t*); extern void _jit_va_push(jit_state_t*, jit_gpr_t); -extern void _jit_pushargr(jit_state_t*, jit_gpr_t); -extern void _jit_pushargi(jit_state_t*, jit_word_t); + +extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_finishr(jit_state_t*, jit_gpr_t); extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t); extern void _jit_ret(jit_state_t*); -extern void _jit_retr(jit_state_t*, jit_gpr_t); -extern void _jit_reti(jit_state_t*, jit_word_t); + +extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_retval_c(jit_state_t*, jit_gpr_t); extern void _jit_retval_uc(jit_state_t*, jit_gpr_t); extern void _jit_retval_s(jit_state_t*, jit_gpr_t); @@ -963,6 +1126,7 @@ extern void _jit_retval_i(jit_state_t*, jit_gpr_t); extern void _jit_retval_ui(jit_state_t*, jit_gpr_t); extern void _jit_retval_l(jit_state_t*, jit_gpr_t); #endif + extern void _jit_epilog(jit_state_t*); #define jit_patch(u) _jit_patch(_jit,u) @@ -987,6 +1151,10 @@ extern void _jit_frame(jit_state_t*, jit_int32_t); extern void _jit_tramp(jit_state_t*, jit_int32_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); +#define jit_unprotect() _jit_unprotect(_jit) +extern void _jit_unprotect(jit_state_t*); +#define jit_protect() _jit_protect(_jit) +extern void _jit_protect(jit_state_t*); #define jit_print() _jit_print(_jit) extern void _jit_print(jit_state_t*); @@ -1059,6 +1227,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, jit_int32_t, jit_int32_t, jit_word_t, jit_word_t); +#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h) +extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, + jit_int32_t, jit_int32_t); #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float32_t); diff --git a/include/lightning.h b/include/lightning.h new file mode 100644 index 000000000..df641fb4d --- /dev/null +++ b/include/lightning.h @@ -0,0 +1,5 @@ + +#define HAVE_MMAP 1 +#include + + diff --git a/lightning-lightrec-include/config.h b/include/lightrec-config.h similarity index 91% rename from lightning-lightrec-include/config.h rename to include/lightrec-config.h index e6176611b..c465f2e84 100644 --- a/lightning-lightrec-include/config.h +++ b/include/lightrec-config.h @@ -7,7 +7,7 @@ #define __LIGHTREC_CONFIG_H__ #define ENABLE_FIRST_PASS 1 -#define ENABLE_TINYMM 0 +#define ENABLE_CODE_BUFFER 1 #define OPT_REMOVE_DIV_BY_ZERO_SEQ 1 #define OPT_REPLACE_MEMSET 1 @@ -16,6 +16,7 @@ #define OPT_LOCAL_BRANCHES 1 #define OPT_SWITCH_DELAY_SLOTS 1 #define OPT_FLAG_STORES 1 +#define OPT_FLAG_IO 1 #define OPT_FLAG_MULT_DIV 1 #define OPT_EARLY_UNLOAD 1 diff --git a/libretro.cpp b/libretro.cpp index a55f9fa43..c4c2de4d0 100644 --- a/libretro.cpp +++ b/libretro.cpp @@ -34,6 +34,7 @@ retro_input_state_t dbg_input_state_cb = 0; #define ISHEXDEC ((codeLine[cursor]>='0') && (codeLine[cursor]<='9')) || ((codeLine[cursor]>='a') && (codeLine[cursor]<='f')) || ((codeLine[cursor]>='A') && (codeLine[cursor]<='F')) #ifdef HAVE_LIGHTREC +#include #include #ifdef HAVE_ASHMEM @@ -43,6 +44,7 @@ retro_input_state_t dbg_input_state_cb = 0; #endif #if defined(HAVE_SHM) || defined(HAVE_ASHMEM) +#include #include #include #endif @@ -104,12 +106,14 @@ uint8 psx_mmap = 0; uint8 *psx_mem = NULL; uint8 *psx_bios = NULL; uint8 *psx_scratch = NULL; +uint8 *lightrec_codebuffer = NULL; #if defined(HAVE_ASHMEM) int memfd; #endif #endif uint32 EventCycles = 128; +uint8_t spu_samples = 1; // CPU overclock factor (or 0 if disabled) int32_t psx_overclock_factor = 0; @@ -479,7 +483,7 @@ FrontIO *PSX_FIO = NULL; MultiAccessSizeMem<512 * 1024, uint32, false> *BIOSROM = NULL; MultiAccessSizeMem<65536, uint32, false> *PIOMem = NULL; -MultiAccessSizeMem<2048 * 1024, uint32, false> *MainRAM = NULL; +MultiAccessSizeMem<2048 * 1024, uint32, false> *MainRAM = (MultiAccessSizeMem<2048 * 1024, uint32, false>*)INVALID_PTR; MultiAccessSizeMem<1024, uint32, false> *ScratchRAM = NULL; #ifdef HAVE_LIGHTREC @@ -1657,6 +1661,13 @@ static void SetDiscWrapper(const bool CD_TrayOpen) { #else #define MAP_FIXED_NOREPLACE 0 #endif +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004 +#endif +#ifndef MAP_HUGETLB +/* don't try to map as hugetlb if not defined */ +#define MAP_HUGETLB 0 +#endif #endif static const uintptr_t supported_io_bases[] = { @@ -1687,31 +1698,70 @@ static const uintptr_t supported_io_bases[] = { #define RAM_SIZE 0x200000 #define BIOS_SIZE 0x80000 #define SCRATCH_SIZE 0x400 -#define SHM_SIZE RAM_SIZE+BIOS_SIZE+SCRATCH_SIZE #ifdef HAVE_WIN_SHM +/* MapViewOfFileEx requires fd and offset in all cases, as the MAP_ANONYMOUS equivalent + is mapping from INVALID_FILE_HANDLE memfd with offset */ #define MAP(addr, size, fd, offset) \ MapViewOfFileEx(fd, FILE_MAP_ALL_ACCESS, 0, offset, size, addr) +#define MAP_SHM(addr,size,fd,offset)\ + MAP(addr,size,fd,offset) +#define MAP_CODE(addr,size,fd,offset)\ + MapViewOfFileEx(fd, FILE_MAP_ALL_ACCESS|FILE_MAP_EXECUTE, 0, offset, size, addr) #define UNMAP(addr, size) UnmapViewOfFile(addr) #define MFAILED NULL #define NUM_MEM 4 #elif defined(HAVE_SHM) || defined(HAVE_ASHMEM) -#define MAP(addr, size, fd, offset) \ + +static void * mmap_huge(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) +{ + void *map = MAP_FAILED; + + if (length >= 0x200000) { + map = mmap(addr, length, prot, + flags | MAP_HUGETLB | (21 << MAP_HUGE_SHIFT), + fd, offset); + if (map != MAP_FAILED) + log_cb(RETRO_LOG_DEBUG, "Hugetlb mmap to address 0x%lx succeeded\n", (uintptr_t) addr); + } + + if (map == MAP_FAILED) { + map = mmap(addr, length, prot, flags, fd, offset); + if (map != MAP_FAILED) + log_cb(RETRO_LOG_DEBUG, "Regular mmap to address 0x%lx succeeded\n", (uintptr_t) addr); + } + + return map; +} + +/* mmap with MAP_ANONYMOUS can ignore fd and offset */ +#define MAP(addr,size,fd,offset)\ mmap(addr,size, PROT_READ | PROT_WRITE, \ - MAP_SHARED | MAP_FIXED_NOREPLACE, fd, offset) + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0) +#define MAP_SHM(addr, size, fd, offset) \ + mmap_huge(addr,size, PROT_READ | PROT_WRITE, \ + MAP_SHARED | MAP_FIXED_NOREPLACE, fd, offset) +#define MAP_CODE(addr, size, fd, offset) \ + mmap_huge(addr,size, PROT_EXEC | PROT_READ | PROT_WRITE, \ + MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, -1, 0) #define UNMAP(addr, size) munmap(addr, size) #define MFAILED MAP_FAILED #define NUM_MEM 4 #else #define MAP(addr, size, fd, offset) \ mmap(addr,size, PROT_READ | PROT_WRITE, \ - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) +#define MAP_SHM(addr,size,fd,offset)\ + MAP(addr,size,fd,offset) +#define MAP_CODE(addr,size,fd,offset)\ + MAP(addr,size,fd,offset) #define UNMAP(addr, size) munmap(addr, size) #define MFAILED MAP_FAILED #define NUM_MEM 1 #endif -int lightrec_init_mmap() +int lightrec_init_mmap(bool hugetlb) { int r = 0, i, j; uintptr_t base; @@ -1743,7 +1793,7 @@ int lightrec_init_mmap() error2 = dlerror(); if (error1 == NULL) - memfd = (*create)("lightrec_memfd",SHM_SIZE); + memfd = (*create)("lightrec_memfd",RAM_SIZE); if (memfd < 0) { log_cb(RETRO_LOG_ERROR, "Failed to ASharedMemory_create: %s\n", @@ -1763,37 +1813,47 @@ int lightrec_init_mmap() } } else { ioctl(memfd, ASHMEM_SET_NAME, "lightrec_memfd"); - ioctl(memfd, ASHMEM_SET_SIZE, SHM_SIZE); + ioctl(memfd, ASHMEM_SET_SIZE, RAM_SIZE); } #endif #ifdef HAVE_SHM int memfd; const char *shm_name = "/lightrec_memfd_beetle"; - memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + //try HUGETLB then fallback to normal memfd + memfd = syscall(SYS_memfd_create,shm_name,hugetlb?MFD_HUGETLB:0); - if (memfd < 0 && errno == EEXIST) { - shm_unlink(shm_name); +#ifndef __ANDROID__ +/* Android can build with HAVE_SHM, but doesn't have shm_open/unlink + Support platforms with shm_open, but without memfd_create */ + if (memfd < 0) { memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + + if (memfd < 0 && errno == EEXIST) { + shm_unlink(shm_name); + memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + } + + /* unlink ASAP to prevent leaving a file in shared memory if we crash */ + shm_unlink(shm_name); } +#endif if (memfd < 0) { log_cb(RETRO_LOG_ERROR, "Failed to create SHM: %s\n", strerror(errno)); return 0; } - /* unlink ASAP to prevent leaving a file in shared memory if we crash */ - shm_unlink(shm_name); - if (ftruncate(memfd, SHM_SIZE) < 0) { - log_cb(RETRO_LOG_ERROR, "Could not truncate SHM size: %s\n", strerror(errno)); + if (ftruncate(memfd, RAM_SIZE) < 0) { + log_cb(RETRO_LOG_ERROR, "Could not truncate memfd size: %s\n", strerror(errno)); goto close_return; } #endif #ifdef HAVE_WIN_SHM HANDLE memfd; - memfd = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, SHM_SIZE, NULL); + memfd = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, 0, RAM_SIZE+LIGHTREC_CODEBUFFER_SIZE+BIOS_SIZE+SCRATCH_SIZE, NULL); if (memfd == NULL) { log_cb(RETRO_LOG_ERROR, "Failed to create WIN_SHM: %s (%d)\n", strerror(errno), GetLastError()); @@ -1808,7 +1868,7 @@ int lightrec_init_mmap() scratch = (void *)(base + 0x1f800000); for (j = 0; j < NUM_MEM; j++) { - map = MAP((void *)(base + j * RAM_SIZE), RAM_SIZE, memfd, 0); + map = MAP_SHM((void *)(base + j * RAM_SIZE), RAM_SIZE, memfd, 0); if (map == MFAILED) break; else if (map != (void *)(base + j * RAM_SIZE)) @@ -1828,40 +1888,57 @@ int lightrec_init_mmap() { psx_mem = (uint8 *)base; - map = MAP(bios, BIOS_SIZE, memfd, RAM_SIZE); + if (ENABLE_CODE_BUFFER) { + /* Allocate a codebuffer after ram and mirrors, but don't reject if actual location is different */ + map = MAP_CODE((void *)(base + NUM_MEM * RAM_SIZE), LIGHTREC_CODEBUFFER_SIZE, memfd, RAM_SIZE); + + if (map == MFAILED){ + log_cb(RETRO_LOG_WARN, "Unable to mmap code buffer, dynarec may be slower\n"); + goto err_unmap; + } + + lightrec_codebuffer = (uint8_t *)map; + } + + map = MAP(bios, BIOS_SIZE, memfd, RAM_SIZE+LIGHTREC_CODEBUFFER_SIZE); if (map == MFAILED) goto err_unmap; psx_bios = (uint8 *)map; if (map != bios) - goto err_unmap_bios; + goto err_unmap; - map = MAP(scratch, SCRATCH_SIZE, memfd, RAM_SIZE+BIOS_SIZE); + map = MAP(scratch, SCRATCH_SIZE, memfd, RAM_SIZE+LIGHTREC_CODEBUFFER_SIZE+BIOS_SIZE); if (map == MFAILED) - goto err_unmap_bios; + goto err_unmap; psx_scratch = (uint8 *)map; if (map != scratch) - goto err_unmap_scratch; + goto err_unmap; r = NUM_MEM; goto close_return; } -err_unmap_scratch: +err_unmap: + if(lightrec_codebuffer){ + UNMAP(lightrec_codebuffer, BIOS_SIZE); + lightrec_codebuffer = NULL; + } + if(psx_scratch){ UNMAP(psx_scratch, SCRATCH_SIZE); psx_scratch = NULL; } -err_unmap_bios: + if(psx_bios){ UNMAP(psx_bios, BIOS_SIZE); psx_bios = NULL; } -err_unmap: + /* Clean up any mapped ram or mirrors and try again */ for (; j > 0; j--) UNMAP((void *)(base + (j - 1) * RAM_SIZE), RAM_SIZE); @@ -1869,7 +1946,7 @@ int lightrec_init_mmap() psx_mem = NULL; } - if (i == ARRAY_SIZE(supported_io_bases)) { + if (i == ARRAY_SIZE(supported_io_bases) && !hugetlb) { log_cb(RETRO_LOG_WARN, "Unable to mmap on any base address, dynarec will be slower\n"); } @@ -2036,7 +2113,11 @@ static void InitCommon(std::vector *_CDInterfaces, const bool EmulateMem SetDiscWrapper(CD_TrayOpen); #ifdef HAVE_LIGHTREC - psx_mmap = lightrec_init_mmap(); + //try hugetlb then fallback if mmap fails + psx_mmap = lightrec_init_mmap(true); + + if(psx_mmap == 0) + psx_mmap = lightrec_init_mmap(false); if(psx_mmap > 0) { @@ -2403,15 +2484,15 @@ static void Cleanup(void) DMA_Kill(); #ifdef HAVE_LIGHTREC - MainRAM = NULL; + MainRAM = (MultiAccessSizeMem<2048 * 1024, uint32, false>*)INVALID_PTR; ScratchRAM = NULL; BIOSROM = NULL; if(psx_mmap > 0) lightrec_free_mmap(); #else - if(MainRAM) + if(MainRAM != INVALID_PTR) delete MainRAM; - MainRAM = NULL; + MainRAM = (MultiAccessSizeMem<2048 * 1024, uint32, false>*)INVALID_PTR; if(ScratchRAM) delete ScratchRAM; @@ -3170,8 +3251,6 @@ static void check_variables(bool startup) { if (strcmp(var.value, "execute") == 0) psx_dynarec = DYNAREC_EXECUTE; - else if (strcmp(var.value, "execute_one") == 0) - psx_dynarec = DYNAREC_EXECUTE_ONE; else if (strcmp(var.value, "run_interpreter") == 0) psx_dynarec = DYNAREC_RUN_INTERPRETER; else @@ -3200,6 +3279,15 @@ static void check_variables(bool startup) } else EventCycles = 128; + + var.key = BEETLE_OPT(dynarec_spu_samples); + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + spu_samples = atoi(var.value); + } + else + spu_samples = 1; #endif var.key = BEETLE_OPT(cpu_freq_scale); diff --git a/libretro_core_options.h b/libretro_core_options.h index 28654c32d..bff1698c9 100644 --- a/libretro_core_options.h +++ b/libretro_core_options.h @@ -1080,7 +1080,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { { { "disabled", "Disabled (Beetle Interpreter)" }, { "execute", "Max Performance" }, - { "execute_one", "Cycle Timing Check" }, { "run_interpreter", "Lightrec Interpreter" }, { NULL, NULL }, }, @@ -1102,24 +1101,36 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { BEETLE_OPT(dynarec_eventcycles), - "Dynarec DMA/GPU Event Cycles", - NULL, - "Max cycles run by CPU before a GPU or DMA Update is checked, higher number will be faster, has much less impact on beetle interpreter than dynarec.", + "Dynarec DMA/GPU/MDEC/Timer Event Cycles", NULL, + "Max cycles run by CPU before a GPU/DMA/MDEC/Timer Update is checked, higher number will be faster, has much less impact on beetle interpreter than dynarec.", NULL, + "hacks", { - { "128", NULL }, + { "128", "128 (Default)" }, { "256", NULL }, - { "384", NULL }, { "512", NULL }, - { "640", NULL }, - { "768", NULL }, - { "896", NULL }, { "1024", NULL }, + { "2048", NULL }, { NULL, NULL }, }, "128" }, + { + BEETLE_OPT(dynarec_spu_samples), + "Dynarec SPU Samples", + NULL, + "Max SPU samples to run before a SPU Update is checked, higher number will be faster, but will cause sound glitches in some games with anything other than 1.", + NULL, + "hacks", + { + { "1", "1 (Default)" }, + { "4", NULL }, + { "16", NULL }, + { NULL, NULL }, + }, + "1" + }, #endif { BEETLE_OPT(core_timing_fps), diff --git a/mednafen/mednafen-types.h b/mednafen/mednafen-types.h index 99eb5cedd..dad023d9a 100644 --- a/mednafen/mednafen-types.h +++ b/mednafen/mednafen-types.h @@ -225,6 +225,10 @@ typedef uint64_t uint64; #define TRUE 1 #endif +#ifndef INVALID_PTR +#define INVALID_PTR ((void *)-1) +#endif + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define MSB_FIRST #else diff --git a/mednafen/mempatcher.cpp b/mednafen/mempatcher.cpp index ebc3aedfa..fd176cc9e 100644 --- a/mednafen/mempatcher.cpp +++ b/mednafen/mempatcher.cpp @@ -148,7 +148,7 @@ void MDFNMP_AddRAM(uint32 size, uint32 A, uint8 *RAM) for(unsigned int x = 0; x < size; x++) { RAMPtrs[AB + x] = RAM; - if(RAM) // Don't increment the RAM pointer if we're passed a NULL pointer + if(RAM != INVALID_PTR) // Don't increment the RAM pointer if we're passed an invalid pointer RAM += PageSize; } } diff --git a/mednafen/psx/cpu.cpp b/mednafen/psx/cpu.cpp index f87141a1d..e7a34499d 100644 --- a/mednafen/psx/cpu.cpp +++ b/mednafen/psx/cpu.cpp @@ -36,16 +36,19 @@ int pgxpMode = PGXP_GetModes(); #ifdef HAVE_LIGHTREC -#include #include #include +extern enum DYNAREC psx_dynarec; enum DYNAREC prev_dynarec; bool prev_invalidate; extern bool psx_dynarec_invalidate; extern uint8 psx_mmap; +extern uint8 *lightrec_codebuffer; static struct lightrec_state *lightrec_state; uint8 next_interpreter; +struct lightrec_registers * PS_CPU::lightrec_regs; +uint32_t cpu_timestamp; #endif extern bool psx_gte_overclock; @@ -187,15 +190,6 @@ void PS_CPU::Power(void) PGXP_Init(); -#ifdef HAVE_LIGHTREC - next_interpreter = 0; - prev_dynarec = psx_dynarec; - prev_invalidate = psx_dynarec_invalidate; - pgxpMode = PGXP_GetModes(); - if(psx_dynarec != DYNAREC_DISABLED) - lightrec_plugin_init(); -#endif - // Not quite sure about these poweron/reset values: for(unsigned i = 0; i < 1024; i++) { @@ -204,6 +198,15 @@ void PS_CPU::Power(void) } GTE_Power(); + +#ifdef HAVE_LIGHTREC + next_interpreter = 0; + prev_dynarec = psx_dynarec; + prev_invalidate = psx_dynarec_invalidate; + pgxpMode = PGXP_GetModes(); + if(psx_dynarec != DYNAREC_DISABLED) + lightrec_plugin_init(); +#endif } int PS_CPU::StateAction(StateMem *sm, const unsigned load, const bool data_only) @@ -299,6 +302,18 @@ void PS_CPU::AssertIRQ(unsigned which, bool asserted) { assert(which <= 5); +#ifdef HAVE_LIGHTREC + if(psx_dynarec != DYNAREC_DISABLED) + { + lightrec_regs->cp0[CP0REG_CAUSE] &= ~(1 << (10 + which)); + + if(asserted) + lightrec_regs->cp0[CP0REG_CAUSE] |= 1 << (10 + which); + + lightrec_set_exit_flags(lightrec_state, LIGHTREC_EXIT_CHECK_INTERRUPT); + } +#endif + CP0.CAUSE &= ~(1 << (10 + which)); if(asserted) @@ -2665,10 +2680,11 @@ pscpu_timestamp_t PS_CPU::Run(pscpu_timestamp_t timestamp_in, bool BIOSPrintMode prev_invalidate != psx_dynarec_invalidate) { //init lightrec when changing dynarec, invalidate, or PGXP option, cleans entire state if already running - if(psx_dynarec != DYNAREC_DISABLED) - { + if(psx_dynarec == DYNAREC_DISABLED) + GTE_SwitchRegisters(false,lightrec_regs->cp2d); + else lightrec_plugin_init(); - } + prev_dynarec = psx_dynarec; pgxpMode = PGXP_GetModes(); prev_invalidate = psx_dynarec_invalidate; @@ -3162,128 +3178,24 @@ void PS_CPU::print_for_big_ass_debugger(int32_t timestamp, uint32_t PC) } #endif /* LIGHTREC_DEBUG */ -u32 PS_CPU::cop_mfc(struct lightrec_state *state, u32 op, u8 reg) -{ - return CP0.Regs[reg]; -} - -u32 PS_CPU::cop_cfc(struct lightrec_state *state, u32 op, u8 reg) -{ - return CP0.Regs[reg]; -} - -u32 PS_CPU::cop2_mfc(struct lightrec_state *state, u32 op, u8 reg) +void PS_CPU::pgxp_cop2_notify(struct lightrec_state *state, u32 op, u32 data) { - return GTE_ReadDR(reg); -} - -u32 PS_CPU::pgxp_cop2_mfc(struct lightrec_state *state, u32 op, u8 reg) -{ - u32 r = GTE_ReadDR(reg); - - if((op >> 26) == OP_CP2) - PGXP_GTE_MFC2(op, r, r); - - return r; -} - -u32 PS_CPU::cop2_cfc(struct lightrec_state *state, u32 op, u8 reg) -{ - return GTE_ReadCR(reg); -} - -u32 PS_CPU::pgxp_cop2_cfc(struct lightrec_state *state, u32 op, u8 reg) -{ - u32 r = GTE_ReadCR(reg); - - PGXP_GTE_CFC2(op, r, r); - - return r; -} - -void PS_CPU::cop_mtc_ctc(struct lightrec_state *state, - u8 reg, u32 value) -{ - switch (reg) { - case 1: - case 4: - case 8: - case 14: - case 15: - /* Those registers are read-only */ - break; - case 12: /* Status */ - if ((CP0.SR & ~value) & (1 << 16)) { - memcpy(MainRAM->data8, cache_buf, sizeof(cache_buf)); - lightrec_invalidate_all(state); - } else if ((~CP0.SR & value) & (1 << 16)) { - memcpy(cache_buf, MainRAM->data8, sizeof(cache_buf)); - } - - CP0.SR = value & ~( (0x3 << 26) | (0x3 << 23) | (0x3 << 6)); - RecalcIPCache(); - lightrec_set_exit_flags(state, - LIGHTREC_EXIT_CHECK_INTERRUPT); - break; - case 13: /* Cause */ - CP0.CAUSE &= ~0x0300; - CP0.CAUSE |= value & 0x0300; - RecalcIPCache(); - lightrec_set_exit_flags(state, - LIGHTREC_EXIT_CHECK_INTERRUPT); - break; - default: - CP0.Regs[reg] = value; - break; + if((op >> 26) == OP_CP2) { + switch ((op >> 21) & 0x1F) { + case 0x00: PGXP_GTE_MFC2(op, data, data); break; + case 0x02: PGXP_GTE_CFC2(op, data, data); break; + case 0x04: PGXP_GTE_MTC2(op, data, data); break; + case 0x06: PGXP_GTE_CTC2(op, data, data); break; + } } } -void PS_CPU::cop_mtc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - cop_mtc_ctc(state, reg, value); -} - -void PS_CPU::cop_ctc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - cop_mtc_ctc(state, reg, value); -} - -void PS_CPU::cop2_mtc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - GTE_WriteDR(reg, value); -} - -void PS_CPU::pgxp_cop2_mtc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - GTE_WriteDR(reg, value); - if((op >> 26) == OP_CP2) - PGXP_GTE_MTC2(op, value, value); -} - -void PS_CPU::cop2_ctc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - GTE_WriteCR(reg, value); -} - -void PS_CPU::pgxp_cop2_ctc(struct lightrec_state *state, u32 op, u8 reg, u32 value) -{ - GTE_WriteCR(reg, value); - PGXP_GTE_CTC2(op, value, value); -} - static bool cp2_ops[0x40] = {0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0, 1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0, 1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1}; -static void cop_op(struct lightrec_state *state, u32 func) -{ - MDFND_DispMessage(3, RETRO_LOG_WARN, - RETRO_MESSAGE_TARGET_LOG, RETRO_MESSAGE_TYPE_NOTIFICATION_ALT, - "Access to invalid co-processor 0"); -} - -static void cop2_op(struct lightrec_state *state, u32 func) +void PS_CPU::cop2_op(struct lightrec_state *state, u32 func) { if (MDFN_UNLIKELY(!cp2_ops[func & 0x3f])) { @@ -3292,7 +3204,9 @@ static void cop2_op(struct lightrec_state *state, u32 func) "Invalid CP2 function %u\n", func); } else + { GTE_Instruction(func); + } } void PS_CPU::reset_target_cycle_count(struct lightrec_state *state, pscpu_timestamp_t timestamp){ @@ -3303,7 +3217,7 @@ void PS_CPU::reset_target_cycle_count(struct lightrec_state *state, pscpu_timest void PS_CPU::hw_write_byte(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u8 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; PSX_MemWrite8(timestamp, mem, val); @@ -3323,7 +3237,7 @@ void PS_CPU::pgxp_nonhw_write_byte(struct lightrec_state *state, void PS_CPU::pgxp_hw_write_byte(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u8 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3337,7 +3251,7 @@ void PS_CPU::pgxp_hw_write_byte(struct lightrec_state *state, void PS_CPU::hw_write_half(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u16 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; PSX_MemWrite16(timestamp, mem, val); @@ -3357,7 +3271,7 @@ void PS_CPU::pgxp_nonhw_write_half(struct lightrec_state *state, void PS_CPU::pgxp_hw_write_half(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u16 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3371,7 +3285,7 @@ void PS_CPU::pgxp_hw_write_half(struct lightrec_state *state, void PS_CPU::hw_write_word(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u32 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; PSX_MemWrite32(timestamp, mem, val); @@ -3407,7 +3321,7 @@ void PS_CPU::pgxp_nonhw_write_word(struct lightrec_state *state, void PS_CPU::pgxp_hw_write_word(struct lightrec_state *state, u32 opcode, void *host, u32 mem, u32 val) { - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3438,13 +3352,13 @@ u8 PS_CPU::hw_read_byte(struct lightrec_state *state, { u8 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; val = PSX_MemRead8(timestamp, mem); /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3469,7 +3383,7 @@ u8 PS_CPU::pgxp_hw_read_byte(struct lightrec_state *state, { u8 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3482,7 +3396,7 @@ u8 PS_CPU::pgxp_hw_read_byte(struct lightrec_state *state, /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3494,13 +3408,13 @@ u16 PS_CPU::hw_read_half(struct lightrec_state *state, { u16 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; val = PSX_MemRead16(timestamp, mem); /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3525,7 +3439,7 @@ u16 PS_CPU::pgxp_hw_read_half(struct lightrec_state *state, { u16 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3538,7 +3452,7 @@ u16 PS_CPU::pgxp_hw_read_half(struct lightrec_state *state, /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3550,13 +3464,13 @@ u32 PS_CPU::hw_read_word(struct lightrec_state *state, { u32 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; val = PSX_MemRead32(timestamp, mem); /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3595,7 +3509,7 @@ u32 PS_CPU::pgxp_hw_read_word(struct lightrec_state *state, { u32 val; - pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state); + pscpu_timestamp_t timestamp = lightrec_current_cycle_count(state) - cpu_timestamp; u32 kmem = kunseg(mem); @@ -3622,7 +3536,7 @@ u32 PS_CPU::pgxp_hw_read_word(struct lightrec_state *state, /* Calling PSX_MemRead* might update timestamp - Make sure * here that state->current_cycle stays in sync. */ - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); reset_target_cycle_count(state, timestamp); @@ -3739,40 +3653,29 @@ struct lightrec_mem_map PS_CPU::lightrec_map[] = { .ops = NULL, .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM], }, + [PSX_MAP_CODE_BUFFER] = { + }, + }; +void PS_CPU::enable_ram(struct lightrec_state *state, _Bool enable) +{ + if (enable) { + memcpy(MainRAM->data8, cache_buf, sizeof(cache_buf)); + } else { + memcpy(cache_buf, MainRAM->data8, sizeof(cache_buf)); + } +} + struct lightrec_ops PS_CPU::ops = { - .cop0_ops = { - .mfc = cop_mfc, - .cfc = cop_cfc, - .mtc = cop_mtc, - .ctc = cop_ctc, - .op = cop_op, - }, - .cop2_ops = { - .mfc = cop2_mfc, - .cfc = cop2_cfc, - .mtc = cop2_mtc, - .ctc = cop2_ctc, - .op = cop2_op, - }, + .cop2_op = cop2_op, + .enable_ram = enable_ram, }; struct lightrec_ops PS_CPU::pgxp_ops = { - .cop0_ops = { - .mfc = cop_mfc, - .cfc = cop_cfc, - .mtc = cop_mtc, - .ctc = cop_ctc, - .op = cop_op, - }, - .cop2_ops = { - .mfc = pgxp_cop2_mfc, - .cfc = pgxp_cop2_cfc, - .mtc = pgxp_cop2_mtc, - .ctc = pgxp_cop2_ctc, - .op = cop2_op, - }, + .cop2_notify = pgxp_cop2_notify, + .cop2_op = cop2_op, + .enable_ram = enable_ram, }; int PS_CPU::lightrec_plugin_init() @@ -3783,9 +3686,10 @@ int PS_CPU::lightrec_plugin_init() uint8_t *psxH = (uint8_t *) ScratchRAM->data8; uint8_t *psxP = (uint8_t *) PSX_LoadExpansion1(); - if(lightrec_state) + if(lightrec_state){ + GTE_SwitchRegisters(false,lightrec_regs->cp2d); lightrec_destroy(lightrec_state); - else{ + }else{ log_cb(RETRO_LOG_INFO, "Lightrec map addresses: M=0x%lx, P=0x%lx, R=0x%lx, H=0x%lx\n", (uintptr_t) psxM, (uintptr_t) psxP, @@ -3805,6 +3709,12 @@ int PS_CPU::lightrec_plugin_init() lightrec_map[PSX_MAP_SCRATCH_PAD].address = psxH; lightrec_map[PSX_MAP_PARALLEL_PORT].address = psxP; + if(lightrec_codebuffer){ + lightrec_map[PSX_MAP_CODE_BUFFER].address = lightrec_codebuffer; + lightrec_map[PSX_MAP_CODE_BUFFER].length = LIGHTREC_CODEBUFFER_SIZE, + log_cb(RETRO_LOG_INFO, "Lightrec codebuffer address: 0x%lx, size: %uMB (0x%08x)\n", lightrec_codebuffer, LIGHTREC_CODEBUFFER_SIZE/(1024*1024),lightrec_map[PSX_MAP_CODE_BUFFER].length); + } + if (PGXP_GetModes() & (PGXP_MODE_MEMORY | PGXP_MODE_GTE)){ lightrec_map[PSX_MAP_HW_REGISTERS].ops = &pgxp_hw_regs_ops; lightrec_map[PSX_MAP_KERNEL_USER_RAM].ops = &pgxp_nonhw_regs_ops; @@ -3826,13 +3736,17 @@ int PS_CPU::lightrec_plugin_init() lightrec_set_invalidate_mode(lightrec_state, psx_dynarec_invalidate); + lightrec_regs = lightrec_get_registers(lightrec_state); + + GTE_SwitchRegisters(true,lightrec_regs->cp2d); + + cpu_timestamp = 0; + return 0; } int32_t PS_CPU::lightrec_plugin_execute(int32_t timestamp) { - uint32_t GPRL[34]; - uint32_t PC; uint32_t new_PC; uint32_t new_PC_mask; @@ -3841,42 +3755,43 @@ int32_t PS_CPU::lightrec_plugin_execute(int32_t timestamp) BACKING_TO_ACTIVE; + memcpy(lightrec_regs->gpr,&GPR,32*sizeof(uint32_t)); + lightrec_regs->gpr[32] = LO; + lightrec_regs->gpr[33] = HI; + u32 flags; do { #ifdef LIGHTREC_DEBUG u32 oldpc = PC; #endif - memcpy(&GPRL,&GPR,32*sizeof(uint32_t)); - GPRL[32] = LO; - GPRL[33] = HI; - lightrec_restore_registers(lightrec_state, GPRL); - lightrec_reset_cycle_count(lightrec_state, timestamp); + lightrec_regs->cp0[CP0REG_SR] = CP0.SR; + lightrec_regs->cp0[CP0REG_CAUSE] = CP0.CAUSE; + lightrec_regs->cp0[CP0REG_EPC] = CP0.EPC; + + lightrec_reset_cycle_count(lightrec_state, timestamp + cpu_timestamp); if (next_interpreter > 0 || psx_dynarec == DYNAREC_RUN_INTERPRETER) - PC = lightrec_run_interpreter(lightrec_state,PC); + PC = lightrec_run_interpreter(lightrec_state, PC, next_event_ts + cpu_timestamp); else if (psx_dynarec == DYNAREC_EXECUTE) - PC = lightrec_execute(lightrec_state, PC, next_event_ts); - else if (psx_dynarec == DYNAREC_EXECUTE_ONE) - PC = lightrec_execute_one(lightrec_state,PC); + PC = lightrec_execute(lightrec_state, PC, next_event_ts + cpu_timestamp); - timestamp = lightrec_current_cycle_count( - lightrec_state); + timestamp = lightrec_current_cycle_count(lightrec_state) - cpu_timestamp; - lightrec_dump_registers(lightrec_state, GPRL); - memcpy(&GPR,&GPRL,32*sizeof(uint32_t)); - LO = GPRL[32]; - HI = GPRL[33]; + CP0.SR = lightrec_regs->cp0[CP0REG_SR]; + CP0.CAUSE = lightrec_regs->cp0[CP0REG_CAUSE]; flags = lightrec_exit_flags(lightrec_state); - if (flags & LIGHTREC_EXIT_SEGFAULT) { - log_cb(RETRO_LOG_ERROR, "Exiting at cycle 0x%08x\n", - timestamp); + if (flags & (LIGHTREC_EXIT_SEGFAULT|LIGHTREC_EXIT_NOMEM)) { + if (flags & LIGHTREC_EXIT_NOMEM) + log_cb(RETRO_LOG_ERROR, "Out of memory at cycle 0x%08x\n", timestamp); + else + log_cb(RETRO_LOG_ERROR, "Segfault at cycle 0x%08x\n", timestamp); + exit(1); } - - if (flags & LIGHTREC_EXIT_SYSCALL) + else if (flags & LIGHTREC_EXIT_SYSCALL) PC = Exception(EXCEPTION_SYSCALL, PC, PC, 0); #ifdef LIGHTREC_DEBUG @@ -3890,8 +3805,18 @@ int32_t PS_CPU::lightrec_plugin_execute(int32_t timestamp) } } while(MDFN_LIKELY(PSX_EventHandler(timestamp))); + memcpy(&GPR,lightrec_regs->gpr,32*sizeof(uint32_t)); + LO = lightrec_regs->gpr[32]; + HI = lightrec_regs->gpr[33]; + ACTIVE_TO_BACKING; + cpu_timestamp += timestamp; + + /* wrap slightly earlier to avoid issues with target < current timestamp */ + if(cpu_timestamp>0xFE000000) + cpu_timestamp &= 0x01FFFFFF; + return timestamp; } diff --git a/mednafen/psx/cpu.h b/mednafen/psx/cpu.h index dbba978e4..475d90688 100644 --- a/mednafen/psx/cpu.h +++ b/mednafen/psx/cpu.h @@ -61,7 +61,13 @@ #include "gte.h" #ifdef HAVE_LIGHTREC + #include #include + + /* 8MB should rarely fill up (4 IPI average for entire 2MB ram), 0 will disable, 1 will fill and clean the buffer quickly, good for finding issues with codebuffer cleanup */ + #define LIGHTREC_CODEBUFFER_SIZE 8*1024*1024 + + enum DYNAREC {DYNAREC_DISABLED, DYNAREC_EXECUTE, DYNAREC_RUN_INTERPRETER}; #endif #if NOT_LIBRETRO @@ -241,23 +247,14 @@ class PS_CPU uint32 ReadInstruction(pscpu_timestamp_t ×tamp, uint32 address); #ifdef HAVE_LIGHTREC + static struct lightrec_registers *lightrec_regs; + static void enable_ram(struct lightrec_state *state, bool enable); + static void cop2_op(struct lightrec_state *state, uint32 op); void print_for_big_ass_debugger(int32 timestamp, uint32 PC); int lightrec_plugin_init(); void lightrec_plugin_shutdown(); int32 lightrec_plugin_execute(int32 timestamp); - static uint32 cop_cfc(lightrec_state *state, uint32 op, uint8 reg); - static uint32 cop_mfc(lightrec_state *state, uint32 op, uint8); - static uint32 cop2_cfc(lightrec_state *state, uint32 op, uint8); - static uint32 cop2_mfc(lightrec_state *state, uint32 op, uint8); - static void cop_mtc_ctc(struct lightrec_state *state, uint8 reg, uint32 value); - static void cop_ctc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); - static void cop_mtc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); - static void cop2_ctc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); - static void cop2_mtc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); - static uint32 pgxp_cop2_cfc(lightrec_state *state, uint32 op, uint8); - static uint32 pgxp_cop2_mfc(lightrec_state *state, uint32 op, uint8); - static void pgxp_cop2_ctc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); - static void pgxp_cop2_mtc(lightrec_state *state, uint32 op, uint8 reg, uint32 value); + static void pgxp_cop2_notify(lightrec_state *state, uint32 op, uint32 data); static struct lightrec_ops ops; static struct lightrec_ops pgxp_ops; static struct lightrec_mem_map_ops pgxp_hw_regs_ops; diff --git a/mednafen/psx/gte.cpp b/mednafen/psx/gte.cpp index 18a7bdea0..7597791f8 100644 --- a/mednafen/psx/gte.cpp +++ b/mednafen/psx/gte.cpp @@ -50,15 +50,12 @@ extern bool psx_gte_overclock; */ -typedef struct +/* For compatibility only */ +typedef union { - int16_t MX[3][3]; - int16_t dummy; -} -#ifndef _MSC_VER -__attribute__((__packed__)) -#endif -gtematrix; + int32_t Raw[4][5]; + int16_t Raw16[4][10]; +} Matrices_t; typedef struct { @@ -81,88 +78,111 @@ typedef struct int16_t Y; } gtexy; -static uint32_t CR[32]; +static uint32_t *CR; +static uint32_t *DR; -typedef union -{ - gtematrix All[4]; - int32_t Raw[4][5]; // Don't read from this(Raw[][]), only write(and when writing, if running on a big-endian platform, swap the upper 16-bits with the lower 16-bits) - int16_t Raw16[4][10]; +static uint32_t REG[64]; +#define CR_OFFSET 32 - struct - { - gtematrix Rot; - gtematrix Light; - gtematrix Color; - gtematrix AbbyNormal; - }; -} Matrices_t; +/* Three 3x3 signed 4.12 matrices: rotation (CR[0]), light (CR[8]), and color (CR[16]) */ +/* interval between Matrices start location in CR registers (0,8,16) */ +#define M_NEXT 8 -static union -{ - int32_t All[4][4]; // Really only [4][3], but [4] to ease address calculation. - - struct - { - int32_t T[4]; - int32_t B[4]; - int32_t FC[4]; - int32_t Null[4]; - }; -} CRVectors; - -/* Control registers */ -static int32_t OFX; /* Screen offset X: signed 16.16 */ -static int32_t OFY; /* Screen offset Y: signed 16.16 */ -static uint16_t H; /* Projection plane distance */ -static int16_t DQA; /* Depth queing coefficient: signed 8.8 */ -static int32_t DQB; /* Depth queing offset: signed 8.24 */ - -static int16_t ZSF3; /* Scale factor when computing the average of 3 Z values - * (triangle): signed 4.12 */ -static int16_t ZSF4; /* Scale factor when computing the average of 4 Z values - * (quad): signed 4.12 */ - -static Matrices_t Matrices; /* Three 3x3 signed 4.12 matrices: rotation, light, and color */ - -// Begin DR -static int16_t Vectors[3][4]; /* Five 3x signed words control vectors: translation, - BackgroundColor, FarColor and Zero (which is always equal to - [0, 0, 0]. */ +/* enum for MVMVA Matrix bits 17-18 (mx in DECODE_FIELDS) */ +enum {Matrices_Rot,Matrices_Light,Matrices_Color,Matrices_AbbyNormal}; -static uint32_t FLAGS; /* Overflow flags generated by the GTE commands */ +/* enum for MVMVA Translation Vector bits 13-14 (cv in DECODE_FIELDS) */ +enum {CRVectors_T,CRVectors_B,CRVectors_FC,CRVectors_Null}; -static int32_t MAC[4]; /* Accumulators for intermediate results, 4 x signed word */ +/* T = CR[5]-CR[7] */ +#define CRV_T 5 +/* B = CR[13]-CR[15] */ +#define CRV_B 13 +/* FC = CR[21]-CR[23] */ +#define CRV_FC 21 +/* interval between CRVs */ +#define CRV_NEXT 8 -static uint16_t OTZ; /* Z average value */ +/* Control register aliases */ +#define OFX (int32_t)CR[24] /* Screen offset X: signed 16.16 */ +#define OFY (int32_t)CR[25] /* Screen offset Y: signed 16.16 */ +#define H (uint16_t)CR[26] /* Projection plane distance */ +#define DQA (int16_t)CR[27] /* Depth queing coefficient: signed 8.8 */ +#define DQB (int32_t)CR[28] /* Depth queing offset: signed 8.24 */ -static gtergb RGB; /* RGB color. High byte is passed around but not used in - computations, it often contains a GPU GP0 command byte. */ +#define ZSF3 (int16_t)CR[29] /* Scale factor when computing the average of 3 Z values + * (triangle): signed 4.12 */ +#define ZSF4 (int16_t)CR[30] /* Scale factor when computing the average of 4 Z values + * (quad): signed 4.12 */ -static int16_t IR[4]; /* Accumulators for intermediate results, 4 x signed halfwords */ +static uint32_t FLAGS; /* Overflow flags generated by the GTE commands */ -static gtexy XY_FIFO[4]; /* XY FIFO : 4 x 2 x signed half words */ +#define Vectors(vec,elm) (int16_t)(DR[(vec*2)+(elm==2?1:0)]>>(elm==1?16:0)) + /* Five 3x signed words control vectors: translation, + BackgroundColor, FarColor and Zero (which is always equal to + [0, 0, 0]. */ -static uint16_t Z_FIFO[4]; /* Z FIFO : 4 x unsigned half words */ +#define VEC3IR 3 /* Vector 3 [IR1,IR2,IR3] */ -static gtergb RGB_FIFO[3]; /* RGB color FIFO */ +#define RGB_R (uint8_t)DR[6] /* RGB color. High byte is passed around but not used in */ +#define RGB_G (uint8_t)(DR[6]>>8) /* computations, it often contains a GPU GP0 command byte. */ +#define RGB_B (uint8_t)(DR[6]>>16) +#define RGB_CD (uint8_t)(DR[6]>>24) -static uint32_t LZCS; /* Input value used to compute the 'lzcr' value below */ +#define SET_OTZ(val) DR[7]=(uint16_t)(val) +#define OTZ (uint16_t)DR[7] /* Z average value */ -static uint32_t LZCR; /* Contains the numbers of leading zeros in LZSC if it's possible - (lzcs[31] is 0) or leading ones if it's negative (lzcs[31] is 1) */ +#define SET_IR(i,val) DR[8+i]=(int16_t)(val) +#define IR(i) (int16_t)DR[8+i] /* Accumulators for intermediate results, 4 x signed halfwords */ +#define IR0 IR(0) +#define IR1 IR(1) +#define IR2 IR(2) +#define IR3 IR(3) + +#define XY_FIFO(i) DR[12+i] /* XY FIFO : 4 x 2 x signed half words */ +#define XY_FIFO_X(i) (int16_t)DR[12+i] +#define XY_FIFO_Y(i) (int16_t)(DR[12+i]>>16) + +#define SET_Z_FIFO(i,val) DR[16+i]=(uint16_t)(val) +#define Z_FIFO(i) (uint16_t)DR[16+i] /* Z FIFO : 4 x unsigned half words */ -static uint32_t Reg23; /* Register 23: 32bit read/write but not used for anything */ +#define RGB_FIFO(i) DR[20+i] /* RGB color FIFO */ +#define RGB_FIFO_R(i) (uint8_t)DR[20+i] +#define RGB_FIFO_G(i) (uint8_t)(DR[20+i]>>8) +#define RGB_FIFO_B(i) (uint8_t)(DR[20+i]>>16) +#define RGB_FIFO_CD(i) (uint8_t)(DR[20+i]>>24) -#define IR0 IR[0] -#define IR1 IR[1] -#define IR2 IR[2] -#define IR3 IR[3] +#define Reg23 DR[23] /* Register 23: 32bit read/write but not used for anything */ +#define SET_MAC(i,val) DR[24+i]=(int32_t)(val) +#define MAC(i) (int32_t)DR[24+i] /* Accumulators for intermediate results, 4 x signed word */ +#define LZCS DR[30] /* Input value used to compute the 'lzcr' value below */ + +#define LZCR DR[31] /* Contains the numbers of leading zeros in LZSC if it's possible + (lzcs[31] is 0) or leading ones if it's negative (lzcs[31] is 1) */ // end DR +/* regs pointer must contain 64*32bits, with CR directly after DR, use_regs + will use provided regs instead of internal REG[64] for DR/CR location + data will be copied between REG[64] and regs pointer to sync them */ +void GTE_SwitchRegisters(bool use_regs, uint32_t *regs) +{ + if(use_regs) + { + memcpy(regs,REG,sizeof(REG)); + DR = regs; + CR = regs+CR_OFFSET; + } + else + { + memcpy(REG,regs,sizeof(REG)); + DR = REG; + CR = REG+CR_OFFSET; + } +} + extern "C" unsigned char widescreen_hack; extern "C" unsigned char widescreen_hack_aspect_ratio_setting; @@ -211,61 +231,170 @@ void GTE_Init(void) void GTE_Power(void) { - memset(CR, 0, sizeof(CR)); - //memset(DR, 0, sizeof(DR)); - - memset(Matrices.All, 0, sizeof(Matrices.All)); - memset(CRVectors.All, 0, sizeof(CRVectors.All)); - OFX = 0; - OFY = 0; - H = 0; - DQA = 0; - DQB = 0; - ZSF3 = 0; - ZSF4 = 0; - - - memset(Vectors, 0, sizeof(Vectors)); - memset(&RGB, 0, sizeof(RGB)); - OTZ = 0; - IR0 = 0; - IR1 = 0; - IR2 = 0; - IR3 = 0; - - memset(XY_FIFO, 0, sizeof(XY_FIFO)); - memset(Z_FIFO, 0, sizeof(Z_FIFO)); - memset(RGB_FIFO, 0, sizeof(RGB_FIFO)); - memset(MAC, 0, sizeof(MAC)); - LZCS = 0; - LZCR = 0; - - Reg23 = 0; + DR = REG; + CR = REG+CR_OFFSET; + + memset(REG, 0, 64*sizeof(uint32_t)); } -// TODO: Don't save redundant state, regarding CR cache variables int GTE_StateAction(StateMem *sm, int load, int data_only) { + /* Duplicates of CR values, save into savestate for compatibility, don't load */ + static Matrices_t Matrices; + static union + { + int32_t All[4][4]; + } CRVectors; + int32_t _OFX = OFX; + int32_t _OFY = OFY; + uint16_t _H = H; + int16_t _DQA = DQA; + int32_t _DQB = DQB; + int16_t _ZSF3 = ZSF3; + int16_t _ZSF4 = ZSF4; + + /* compatibility variables, transfer to/from CR/DR[32] during save states */ + int16_t Vectors[3][4]; + int32_t MAC[4]; + uint16_t _OTZ; + gtergb RGB; + int16_t IR[4]; + gtexy XY_FIFO[4]; + uint16_t Z_FIFO[4]; + gtergb RGB_FIFO[3]; + uint32_t _LZCS; + uint32_t _LZCR; + uint32_t _Reg23; + + if(!load) + { + for(int i = 0; i < 24; i++) + { + int which = i; + uint32_t value = CR[which]; + int we = which >> 3; + which &= 0x7; + + if(which >= 5) + CRVectors.All[we][which - 5] = value; + else + { +#ifdef MSB_FIRST + Matrices.Raw[we][which] = (value << 16) | (value >> 16); +#else + Matrices.Raw[we][which] = value; +#endif + } + } + + Vectors[0][0] = DR[0]; + Vectors[0][1] = DR[0] >> 16; + + Vectors[0][2] = DR[1]; + + Vectors[1][0] = DR[2]; + Vectors[1][1] = DR[2] >> 16; + + Vectors[1][2] = DR[3]; + + Vectors[2][0] = DR[4]; + Vectors[2][1] = DR[4] >> 16; + + Vectors[2][2] = DR[5]; + + RGB.R = DR[6] >> 0; + RGB.G = DR[6] >> 8; + RGB.B = DR[6] >> 16; + RGB.CD = DR[6] >> 24; + + _OTZ = DR[7]; + + IR[0] = DR[8]; + + IR[1] = DR[9]; + + IR[2] = DR[10]; + + IR[3] = DR[11]; + + XY_FIFO[0].X = DR[12]; + XY_FIFO[0].Y = DR[12] >> 16; + + XY_FIFO[1].X = DR[13]; + XY_FIFO[1].Y = DR[13] >> 16; + + XY_FIFO[2].X = DR[14]; + XY_FIFO[2].Y = DR[14] >> 16; + + //15 is a mirror of 14 + XY_FIFO[3].X = DR[14]; + XY_FIFO[3].Y = DR[14] >> 16; + + Z_FIFO[0] = DR[16]; + + Z_FIFO[1] = DR[17]; + + Z_FIFO[2] = DR[18]; + + Z_FIFO[3] = DR[19]; + + RGB_FIFO[0].R = DR[20]; + RGB_FIFO[0].G = DR[20] >> 8; + RGB_FIFO[0].B = DR[20] >> 16; + RGB_FIFO[0].CD = DR[20] >> 24; + + RGB_FIFO[1].R = DR[21]; + RGB_FIFO[1].G = DR[21] >> 8; + RGB_FIFO[1].B = DR[21] >> 16; + RGB_FIFO[1].CD = DR[21] >> 24; + + RGB_FIFO[2].R = DR[22]; + RGB_FIFO[2].G = DR[22] >> 8; + RGB_FIFO[2].B = DR[22] >> 16; + RGB_FIFO[2].CD = DR[22] >> 24; + + _Reg23 = DR[23]; + + MAC[0] = DR[24]; + + MAC[1] = DR[25]; + + MAC[2] = DR[26]; + + MAC[3] = DR[27]; + + //28: Overwrites value 9,10,11 + + //29: Read-only + + _LZCS = DR[30]; + _LZCR = MDFN_lzcount32(DR[30] ^ ((int32)DR[30] >> 31)); + + //31: Read-only + } + SFORMAT StateRegs[] = { { CR, (uint32_t)(32 * sizeof(uint32_t)), MDFNSTATE_RLSB32 | 0, "CR" }, { &FLAGS, sizeof(FLAGS), MDFNSTATE_RLSB | 0, "FLAGS" }, + SFARRAY16(&Matrices.Raw16[0][0], 4 * 10), SFARRAY32(&CRVectors.All[0][0], 4 * 4), - SFVARN(OFX, "OFX"), - SFVARN(OFY, "OFY"), - SFVARN(H, "H"), - SFVARN(DQA, "DQA"), - SFVARN(DQB, "DQB"), + SFVARN(_OFX, "OFX"), + SFVARN(_OFY, "OFY"), + SFVARN(_H, "H"), + SFVARN(_DQA, "DQA"), + SFVARN(_DQB, "DQB"), + + SFVARN(_ZSF3, "ZSF3"), + SFVARN(_ZSF4, "ZSF4"), - SFVARN(ZSF3, "ZSF3"), - SFVARN(ZSF4, "ZSF4"), SFARRAY16(&Vectors[0][0], 3 * 4), SFARRAY(RGB.Raw8, 4), - SFVARN(OTZ, "OTZ"), + SFVARN(_OTZ, "OTZ"), SFARRAY16(IR, 4), SFVAR(XY_FIFO[0].X), @@ -285,20 +414,49 @@ int GTE_StateAction(StateMem *sm, int load, int data_only) SFARRAY32(MAC, 4), - SFVARN(LZCS, "LZCS"), - SFVARN(LZCR, "LZCR"), - SFVARN(Reg23, "Reg23"), + SFVARN(_LZCS, "LZCS"), + SFVARN(_LZCR, "LZCR"), + SFVARN(_Reg23, "Reg23"), SFEND }; int ret = MDFNSS_StateAction(sm, load, data_only, StateRegs, "GTE"); -#if 0 if(load) { - + DR[0] = (uint16_t)Vectors[0][0] | ((uint16_t)Vectors[0][1] << 16); + DR[1] = (int16_t)Vectors[0][2]; + DR[2] = (uint16_t)Vectors[1][0] | ((uint16_t)Vectors[1][1] << 16); + DR[3] = (int16_t)Vectors[1][2]; + DR[4] = (uint16_t)Vectors[2][0] | ((uint16_t)Vectors[2][1] << 16); + DR[5] = (int16_t)Vectors[2][2]; + DR[6] = RGB.R | (RGB.G << 8) | (RGB.B << 16) | (RGB.CD << 24); + DR[7] = (uint16_t)OTZ; + DR[8] = (int16_t)IR[0]; + DR[9] = (int16_t)IR[1]; + DR[10] = (int16_t)IR[2]; + DR[11] = (int16_t)IR[3]; + DR[12] = (uint16_t)XY_FIFO[0].X | ((uint16_t)XY_FIFO[0].Y << 16); + DR[13] = (uint16_t)XY_FIFO[1].X | ((uint16_t)XY_FIFO[1].Y << 16); + DR[14] = (uint16_t)XY_FIFO[2].X | ((uint16_t)XY_FIFO[2].Y << 16); + DR[15] = (uint16_t)XY_FIFO[3].X | ((uint16_t)XY_FIFO[3].Y << 16); + DR[16] = (uint16_t)Z_FIFO(0); + DR[17] = (uint16_t)Z_FIFO(1); + DR[18] = (uint16_t)Z_FIFO(2); + DR[19] = (uint16_t)Z_FIFO(3); + DR[20] = RGB_FIFO[0].R | (RGB_FIFO[0].G << 8) | (RGB_FIFO[0].B << 16) | (RGB_FIFO[0].CD << 24); + DR[21] = RGB_FIFO[1].R | (RGB_FIFO[1].G << 8) | (RGB_FIFO[1].B << 16) | (RGB_FIFO[1].CD << 24); + DR[22] = RGB_FIFO[2].R | (RGB_FIFO[2].G << 8) | (RGB_FIFO[2].B << 16) | (RGB_FIFO[2].CD << 24); + DR[23] = Reg23; + DR[24] = MAC[0]; + DR[25] = MAC[1]; + DR[26] = MAC[2]; + DR[27] = MAC[3]; + DR[28] = Sat5(IR[1] >> 7) | (Sat5(IR[2] >> 7) << 5) | (Sat5(IR[3] >> 7) << 10); + DR[29] = Sat5(IR[1] >> 7) | (Sat5(IR[2] >> 7) << 5) | (Sat5(IR[3] >> 7) << 10); + DR[30] = LZCS; + DR[31] = LZCR; } -#endif return(ret); } @@ -326,104 +484,17 @@ void GTE_WriteCR(unsigned int which, uint32_t value) CR[which] = value | (CR[which] & ~mask_table[which]); - if(which < 24) - { - int we = which >> 3; - which &= 0x7; - - if(which >= 5) - CRVectors.All[we][which - 5] = value; - else - { -#ifdef MSB_FIRST - Matrices.Raw[we][which] = (value << 16) | (value >> 16); -#else - Matrices.Raw[we][which] = value; -#endif - } - return; - } - - switch(which) - { - case 24: - OFX = value; - break; - - case 25: - OFY = value; - break; - - case 26: - H = value; - break; - - case 27: - DQA = value; - break; - - case 28: - DQB = value; - break; - - case 29: - ZSF3 = value; - break; - - case 30: - ZSF4 = value; - break; - - case 31: + if (which == 31) CR[31] = (value & 0x7ffff000) | ((value & 0x7f87e000) ? (1 << 31) : 0); - break; - } } uint32_t GTE_ReadCR(unsigned int which) { uint32_t ret = 0; - switch(which) - { - default: - ret = CR[which]; - if(which == 4 || which == 12 || which == 20) - ret = (int16)ret; - break; - - case 24: - ret = OFX; - break; - - case 25: - ret = OFY; - break; - - case 26: - ret = (int16)H; - break; - - case 27: - ret = (int16)DQA; - break; - - case 28: - ret = DQB; - break; - - case 29: - ret = (int16)ZSF3; - break; - - case 30: - ret = (int16)ZSF4; - break; - - case 31: - ret = CR[31]; - break; - } + ret = CR[which]; + if(which == 4 || which == 12 || which == 20 || which == 26 || which == 27 || which == 29 || which ==30) + ret = (int16)ret; return(ret); } @@ -433,120 +504,100 @@ void GTE_WriteDR(unsigned int which, uint32_t value) switch(which & 0x1F) { case 0: - Vectors[0][0] = value; - Vectors[0][1] = value >> 16; + DR[0] = value; break; case 1: - Vectors[0][2] = value; + DR[1] = (int16_t)value; break; case 2: - Vectors[1][0] = value; - Vectors[1][1] = value >> 16; + DR[2] = value; break; case 3: - Vectors[1][2] = value; + DR[3] = (int16_t)value; break; case 4: - Vectors[2][0] = value; - Vectors[2][1] = value >> 16; + DR[4] = value; break; case 5: - Vectors[2][2] = value; + DR[5] = (int16_t)value; break; case 6: - RGB.R = value >> 0; - RGB.G = value >> 8; - RGB.B = value >> 16; - RGB.CD = value >> 24; + DR[6] = value; break; case 7: - OTZ = value; + SET_OTZ(value); break; case 8: - IR0 = value; + SET_IR(0, value); break; case 9: - IR1 = value; + SET_IR(1, value); break; case 10: - IR2 = value; + SET_IR(2, value); break; case 11: - IR3 = value; + SET_IR(3, value); break; case 12: - XY_FIFO[0].X = value; - XY_FIFO[0].Y = value >> 16; + DR[12] = value; break; case 13: - XY_FIFO[1].X = value; - XY_FIFO[1].Y = value >> 16; + DR[13] = value; break; case 14: - XY_FIFO[2].X = value; - XY_FIFO[2].Y = value >> 16; - XY_FIFO[3].X = value; - XY_FIFO[3].Y = value >> 16; + DR[14] = value; + DR[15] = value; break; case 15: - XY_FIFO[3].X = value; - XY_FIFO[3].Y = value >> 16; + DR[15] = value; - XY_FIFO[0] = XY_FIFO[1]; - XY_FIFO[1] = XY_FIFO[2]; - XY_FIFO[2] = XY_FIFO[3]; + DR[12] = DR[13]; + DR[13] = DR[14]; + DR[14] = DR[15]; break; case 16: - Z_FIFO[0] = value; + SET_Z_FIFO(0, value); break; case 17: - Z_FIFO[1] = value; + SET_Z_FIFO(1, value); break; case 18: - Z_FIFO[2] = value; + SET_Z_FIFO(2, value); break; case 19: - Z_FIFO[3] = value; + SET_Z_FIFO(3, value); break; case 20: - RGB_FIFO[0].R = value; - RGB_FIFO[0].G = value >> 8; - RGB_FIFO[0].B = value >> 16; - RGB_FIFO[0].CD = value >> 24; + RGB_FIFO(0) = value; break; case 21: - RGB_FIFO[1].R = value; - RGB_FIFO[1].G = value >> 8; - RGB_FIFO[1].B = value >> 16; - RGB_FIFO[1].CD = value >> 24; + RGB_FIFO(1) = value; break; case 22: - RGB_FIFO[2].R = value; - RGB_FIFO[2].G = value >> 8; - RGB_FIFO[2].B = value >> 16; - RGB_FIFO[2].CD = value >> 24; + RGB_FIFO(2) = value; break; case 23: @@ -554,34 +605,33 @@ void GTE_WriteDR(unsigned int which, uint32_t value) break; case 24: - MAC[0] = value; + SET_MAC(0, value); break; case 25: - MAC[1] = value; + SET_MAC(1, value); break; case 26: - MAC[2] = value; + SET_MAC(2, value); break; case 27: - MAC[3] = value; + SET_MAC(3, value); break; case 28: - IR1 = ((value >> 0) & 0x1F) << 7; - IR2 = ((value >> 5) & 0x1F) << 7; - IR3 = ((value >> 10) & 0x1F) << 7; + SET_IR(1, ((value >> 0) & 0x1F) << 7); + SET_IR(2, ((value >> 5) & 0x1F) << 7); + SET_IR(3, ((value >> 10) & 0x1F) << 7); break; case 29: // Read-only break; case 30: - LZCS = value; - LZCR = MDFN_lzcount32(value ^ ((int32)value >> 31)); + LZCR = MDFN_lzcount32(value ^ ((int32)value >> 31)); break; case 31: // Read-only @@ -596,31 +646,31 @@ uint32_t GTE_ReadDR(unsigned int which) switch(which & 0x1F) { case 0: - ret = (uint16_t)Vectors[0][0] | ((uint16_t)Vectors[0][1] << 16); + ret = (uint16_t)Vectors(0, 0) | ((uint16_t)Vectors(0, 1) << 16); break; case 1: - ret = (int16_t)Vectors[0][2]; + ret = (int16_t)Vectors(0, 2); break; case 2: - ret = (uint16_t)Vectors[1][0] | ((uint16_t)Vectors[1][1] << 16); + ret = (uint16_t)Vectors(1, 0) | ((uint16_t)Vectors(1, 1) << 16); break; case 3: - ret = (int16_t)Vectors[1][2]; + ret = (int16_t)Vectors(1, 2); break; case 4: - ret = (uint16_t)Vectors[2][0] | ((uint16_t)Vectors[2][1] << 16); + ret = (uint16_t)Vectors(2, 0) | ((uint16_t)Vectors(2, 1) << 16); break; case 5: - ret = (int16_t)Vectors[2][2]; + ret = (int16_t)Vectors(2, 2); break; case 6: - ret = RGB.R | (RGB.G << 8) | (RGB.B << 16) | (RGB.CD << 24); + ret = RGB_R | (RGB_G << 8) | (RGB_B << 16) | (RGB_CD << 24); break; case 7: @@ -644,47 +694,47 @@ uint32_t GTE_ReadDR(unsigned int which) break; case 12: - ret = (uint16_t)XY_FIFO[0].X | ((uint16_t)XY_FIFO[0].Y << 16); + ret = (uint16_t)XY_FIFO_X(0) | ((uint16_t)XY_FIFO_Y(0) << 16); break; case 13: - ret = (uint16_t)XY_FIFO[1].X | ((uint16_t)XY_FIFO[1].Y << 16); + ret = (uint16_t)XY_FIFO_X(1) | ((uint16_t)XY_FIFO_Y(1) << 16); break; case 14: - ret = (uint16_t)XY_FIFO[2].X | ((uint16_t)XY_FIFO[2].Y << 16); + ret = (uint16_t)XY_FIFO_X(2) | ((uint16_t)XY_FIFO_Y(2) << 16); break; case 15: - ret = (uint16_t)XY_FIFO[3].X | ((uint16_t)XY_FIFO[3].Y << 16); + ret = (uint16_t)XY_FIFO_X(3) | ((uint16_t)XY_FIFO_Y(3) << 16); break; case 16: - ret = (uint16_t)Z_FIFO[0]; + ret = (uint16_t)Z_FIFO(0); break; case 17: - ret = (uint16_t)Z_FIFO[1]; + ret = (uint16_t)Z_FIFO(1); break; case 18: - ret = (uint16_t)Z_FIFO[2]; + ret = (uint16_t)Z_FIFO(2); break; case 19: - ret = (uint16_t)Z_FIFO[3]; + ret = (uint16_t)Z_FIFO(3); break; case 20: - ret = RGB_FIFO[0].R | (RGB_FIFO[0].G << 8) | (RGB_FIFO[0].B << 16) | (RGB_FIFO[0].CD << 24); + ret = RGB_FIFO(0); break; case 21: - ret = RGB_FIFO[1].R | (RGB_FIFO[1].G << 8) | (RGB_FIFO[1].B << 16) | (RGB_FIFO[1].CD << 24); + ret = RGB_FIFO(1); break; case 22: - ret = RGB_FIFO[2].R | (RGB_FIFO[2].G << 8) | (RGB_FIFO[2].B << 16) | (RGB_FIFO[2].CD << 24); + ret = RGB_FIFO(2); break; case 23: @@ -692,19 +742,19 @@ uint32_t GTE_ReadDR(unsigned int which) break; case 24: - ret = MAC[0]; + ret = MAC(0); break; case 25: - ret = MAC[1]; + ret = MAC(1); break; case 26: - ret = MAC[2]; + ret = MAC(2); break; case 27: - ret = MAC[3]; + ret = MAC(3); break; case 28: @@ -985,12 +1035,9 @@ static INLINE uint8_t MAC_to_COLOR(uint8_t flag, int32_t mac) static INLINE void MAC_to_RGB_FIFO(void) { - RGB_FIFO[0] = RGB_FIFO[1]; - RGB_FIFO[1] = RGB_FIFO[2]; - RGB_FIFO[2].R = Lm_C(0, MAC[1] >> 4); - RGB_FIFO[2].G = Lm_C(1, MAC[2] >> 4); - RGB_FIFO[2].B = Lm_C(2, MAC[3] >> 4); - RGB_FIFO[2].CD = RGB.CD; + RGB_FIFO(0) = RGB_FIFO(1); + RGB_FIFO(1) = RGB_FIFO(2); + RGB_FIFO(2) = Lm_C(0, MAC(1) >> 4) | ((Lm_C(1, MAC(2) >> 4)) << 8) | ((Lm_C(2, MAC(3) >> 4)) << 16) | (RGB_CD << 24); } static INLINE int16_t Lm_B(unsigned int which, int32_t value, int lm) @@ -1016,28 +1063,28 @@ static INLINE int16_t Lm_B(unsigned int which, int32_t value, int lm) static INLINE void MAC_to_IR(int lm) { - IR1 = i32_to_i16_saturate(0, MAC[1], lm); - IR2 = i32_to_i16_saturate(1, MAC[2], lm); - IR3 = i32_to_i16_saturate(2, MAC[3], lm); + SET_IR(1, i32_to_i16_saturate(0, MAC(1), lm)); + SET_IR(2, i32_to_i16_saturate(1, MAC(2), lm)); + SET_IR(3, i32_to_i16_saturate(2, MAC(3), lm)); } -static INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16_t *v, const int32_t *crv, uint32_t sf, int lm) +static INLINE void MultiplyMatrixByVector(uint32_t mx, uint32_t v, uint32_t cv, uint32_t sf, int lm) { - unsigned i; + unsigned i,m; for(i = 0; i < 3; i++) { int64_t tmp; int32_t mulr[3]; - tmp = (uint64_t)(int64_t)crv[i] << 12; + tmp = (cv == CRVectors_Null) ? 0 : ((uint64_t)(int64_t)(int32_t)CR[(cv*CRV_NEXT)+CRV_T+i] << 12); - if(matrix == &Matrices.AbbyNormal) + if(mx == Matrices_AbbyNormal) { if(i == 0) { - mulr[0] = -(RGB.R << 4); - mulr[1] = (RGB.R << 4); + mulr[0] = -(RGB_R << 4); + mulr[1] = (RGB_R << 4); mulr[2] = IR0; } else @@ -1049,16 +1096,43 @@ static INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16_t } else { - mulr[0] = matrix->MX[i][0]; - mulr[1] = matrix->MX[i][1]; - mulr[2] = matrix->MX[i][2]; + m = mx*M_NEXT; + + if(i == 0) + { + mulr[0] = (int16_t)CR[m]; + mulr[1] = (int16_t)(CR[m] >> 16); + mulr[2] = (int16_t)CR[m + 1]; + } + else if(i == 1) + { + mulr[0] = (int16_t)(CR[m + 1] >> 16); + mulr[1] = (int16_t)CR[m + 2]; + mulr[2] = (int16_t)(CR[m + 2] >> 16); + } + else + { + mulr[0] = (int16_t)CR[m + 3]; + mulr[1] = (int16_t)(CR[m + 3] >> 16); + mulr[2] = (int16_t)CR[m + 4]; + } + } + + if(v == VEC3IR) + { + mulr[0] *= IR1; + mulr[1] *= IR2; + mulr[2] *= IR3; + } + else + { + mulr[0] *= Vectors(v, 0); + mulr[1] *= Vectors(v, 1); + mulr[2] *= Vectors(v, 2); } - mulr[0] *= v[0]; - mulr[1] *= v[1]; - mulr[2] *= v[2]; tmp = A_MV(i, tmp + mulr[0]); - if(crv == CRVectors.FC) + if(cv == CRVectors_FC) { Lm_B(i, tmp >> sf, false); tmp = 0; @@ -1067,64 +1141,71 @@ static INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16_t tmp = A_MV(i, tmp + mulr[1]); tmp = A_MV(i, tmp + mulr[2]); - MAC[1 + i] = tmp >> sf; + SET_MAC(1 + i, tmp >> sf); } MAC_to_IR(lm); } -static INLINE void MultiplyMatrixByVector_PT(const gtematrix *matrix, const int16_t *v, const int32_t *crv, uint32_t sf, int lm) +static INLINE void MultiplyMatrixByVector_PT(uint32_t mx, uint32_t v, uint32_t cv, uint32_t sf, int lm) { int64_t tmp[3]; - unsigned i; + unsigned i,m; for(i = 0; i < 3; i++) { int32_t mulr[3]; - tmp[i] = (uint64_t)(int64_t)crv[i] << 12; + tmp[i] = (cv == CRVectors_Null) ? 0 : (uint64_t)(int64_t)(int32_t)CR[(cv*CRV_NEXT)+CRV_T+i] << 12; + m = mx*M_NEXT; - mulr[0] = matrix->MX[i][0] * v[0]; - mulr[1] = matrix->MX[i][1] * v[1]; - mulr[2] = matrix->MX[i][2] * v[2]; + if(i == 0) + { + mulr[0] = (int16_t)CR[m]; + mulr[1] = (int16_t)(CR[m] >> 16); + mulr[2] = (int16_t)CR[m + 1]; + } + else if(i == 1) + { + mulr[0] = (int16_t)(CR[m + 1] >> 16); + mulr[1] = (int16_t)CR[m + 2]; + mulr[2] = (int16_t)(CR[m + 2] >> 16); + } + else + { + mulr[0] = (int16_t)CR[m + 3]; + mulr[1] = (int16_t)(CR[m + 3] >> 16); + mulr[2] = (int16_t)CR[m + 4]; + } + + mulr[0] *= Vectors(v, 0); + mulr[1] *= Vectors(v, 1); + mulr[2] *= Vectors(v, 2); tmp[i] = A_MV(i, tmp[i] + mulr[0]); tmp[i] = A_MV(i, tmp[i] + mulr[1]); tmp[i] = A_MV(i, tmp[i] + mulr[2]); - MAC[1 + i] = tmp[i] >> sf; + SET_MAC(1 + i, tmp[i] >> sf); } - IR1 = Lm_B(0, MAC[1], lm); - IR2 = Lm_B(1, MAC[2], lm); - //printf("FTV: %08x %08x\n", crv[2], (uint32)(tmp[2] >> 12)); - IR3 = Lm_B_PTZ(2, MAC[3], tmp[2] >> 12, lm); + SET_IR(1, Lm_B(0, MAC(1), lm)); + SET_IR(2, Lm_B(1, MAC(2), lm)); + //printf("FTV: %08x %08x\n", CR[cv*CRV_NEXT+CRV_T+2], (uint32)(tmp[2] >> 12)); + SET_IR(3, Lm_B_PTZ(2, MAC(3), tmp[2] >> 12, lm)); - Z_FIFO[0] = Z_FIFO[1]; - Z_FIFO[1] = Z_FIFO[2]; - Z_FIFO[2] = Z_FIFO[3]; - Z_FIFO[3] = Lm_D(tmp[2] >> 12, true); + SET_Z_FIFO(0, Z_FIFO(1)); + SET_Z_FIFO(1, Z_FIFO(2)); + SET_Z_FIFO(2, Z_FIFO(3)); + SET_Z_FIFO(3, Lm_D(tmp[2] >> 12, true)); } #define DECODE_FIELDS \ const uint32 sf MDFN_NOWARN_UNUSED = (instr & (1 << 19)) ? 12 : 0; \ const uint32 mx MDFN_NOWARN_UNUSED = (instr >> 17) & 0x3; \ - const uint32 v_i = (instr >> 15) & 0x3; \ - const int32* cv MDFN_NOWARN_UNUSED = CRVectors.All[(instr >> 13) & 0x3]; \ - const int lm MDFN_NOWARN_UNUSED = (instr >> 10) & 1; \ - int16 v[3] MDFN_NOWARN_UNUSED; \ - if(v_i == 3) \ - { \ - v[0] = IR1; \ - v[1] = IR2; \ - v[2] = IR3; \ - } \ - else \ - { \ - v[0] = Vectors[v_i][0]; \ - v[1] = Vectors[v_i][1]; \ - v[2] = Vectors[v_i][2]; \ - } + const uint32 v MDFN_NOWARN_UNUSED = (instr >> 15) & 0x3; \ + const uint32 cv MDFN_NOWARN_UNUSED = (instr >> 13) & 0x3; \ + const int lm MDFN_NOWARN_UNUSED = (instr >> 10) & 1; /* SQR - Square Vector */ static int32_t SQR(uint32_t instr) @@ -1136,8 +1217,8 @@ static int32_t SQR(uint32_t instr) for (i = 1; i < 4; i++) { - int32_t ir = IR[i]; - MAC[i] = (ir * ir) >> sf; + int32_t ir = IR(i); + SET_MAC(i, (ir * ir) >> sf); } MAC_to_IR(lm); @@ -1150,7 +1231,7 @@ static int32_t MVMVA(uint32_t instr) { DECODE_FIELDS; - MultiplyMatrixByVector(&Matrices.All[mx], v, cv, sf, lm); + MultiplyMatrixByVector(mx, v, cv, sf, lm); return(8); } @@ -1213,15 +1294,16 @@ static INLINE void TransformXY(int64_t h_div_sz, float precise_h_div_sz, float p break; } - MAC[0] = F((int64_t)OFX + IR1 * h_div_sz * ((widescreen_hack) ? widescreen_hack_aspect_ratio : 1.00)) >> 16; - XY_FIFO[3].X = Lm_G(0, MAC[0]); + SET_MAC(0, F((int64_t)OFX + IR1 * h_div_sz * ((widescreen_hack) ? widescreen_hack_aspect_ratio : 1.00)) >> 16); + int16_t tmp = Lm_G(0, MAC(0)); + + SET_MAC(0, F((int64_t)OFY + IR2 * h_div_sz) >> 16); - MAC[0] = F((int64_t)OFY + IR2 * h_div_sz) >> 16; - XY_FIFO[3].Y = Lm_G(1, MAC[0]); + XY_FIFO(3) = (uint16_t)tmp | ((uint16_t)Lm_G(1, MAC(0)) << 16); - XY_FIFO[0] = XY_FIFO[1]; - XY_FIFO[1] = XY_FIFO[2]; - XY_FIFO[2] = XY_FIFO[3]; + XY_FIFO(0) = XY_FIFO(1); + XY_FIFO(1) = XY_FIFO(2); + XY_FIFO(2) = XY_FIFO(3); /* * PGXP hack to add subpixel precision as well @@ -1237,7 +1319,7 @@ static INLINE void TransformXY(int64_t h_div_sz, float precise_h_div_sz, float p float precise_x = fofx + ((float)IR1 * precise_h_div_sz) * ((widescreen_hack) ? widescreen_hack_aspect_ratio : 1.00); float precise_y = fofy + ((float)IR2 * precise_h_div_sz); - uint32 value = *((uint32*)&XY_FIFO[3]); + uint32 value = *((uint32*)&XY_FIFO(3)); /* Clamp precision values to valid range */ precise_x = float_max(-0x400, float_min(precise_x, 0x3ff)); @@ -1249,8 +1331,8 @@ static INLINE void TransformXY(int64_t h_div_sz, float precise_h_div_sz, float p static INLINE void TransformDQ(int64_t h_div_sz) { - MAC[0] = F((int64_t)DQB + DQA * h_div_sz); - IR0 = Lm_H(((int64_t)DQB + DQA * h_div_sz) >> 12); + SET_MAC(0, F((int64_t)DQB + DQA * h_div_sz)); + SET_IR(0, Lm_H(((int64_t)DQB + DQA * h_div_sz) >> 12)); } static INLINE int32 RTPS(uint32 instr) @@ -1260,10 +1342,10 @@ static INLINE int32 RTPS(uint32 instr) float precise_z; float precise_h_div_sz; - MultiplyMatrixByVector_PT(&Matrices.Rot, Vectors[0], CRVectors.T, sf, lm); - h_div_sz = Divide(H, Z_FIFO[3]); + MultiplyMatrixByVector_PT(Matrices_Rot, 0, CRVectors_T, sf, lm); + h_div_sz = Divide(H, Z_FIFO(3)); - precise_z = float_max(H/2.f, (float)Z_FIFO[3]); + precise_z = float_max(H/2.f, (float)Z_FIFO(3)); precise_h_div_sz = (float)H / precise_z; TransformXY(h_div_sz, precise_h_div_sz, precise_z); @@ -1283,10 +1365,10 @@ static INLINE int32 RTPT(uint32 instr) float precise_z; float precise_h_div_sz; - MultiplyMatrixByVector_PT(&Matrices.Rot, Vectors[i], CRVectors.T, sf, lm); - h_div_sz = Divide(H, Z_FIFO[3]); + MultiplyMatrixByVector_PT(Matrices_Rot, i, CRVectors_T, sf, lm); + h_div_sz = Divide(H, Z_FIFO(3)); - precise_z = float_max(H/2.f, (float)Z_FIFO[3]); + precise_z = float_max(H/2.f, (float)Z_FIFO(3)); precise_h_div_sz = (float)H / precise_z; TransformXY(h_div_sz, precise_h_div_sz, precise_z); @@ -1300,12 +1382,9 @@ static INLINE int32 RTPT(uint32 instr) static INLINE void NormColor(uint32_t sf, int lm, uint32_t v) { - int16_t tmp_vector[3]; - - MultiplyMatrixByVector(&Matrices.Light, Vectors[v], CRVectors.Null, sf, lm); + MultiplyMatrixByVector(Matrices_Light, v, CRVectors_Null, sf, lm); - tmp_vector[0] = IR1; tmp_vector[1] = IR2; tmp_vector[2] = IR3; - MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm); + MultiplyMatrixByVector(Matrices_Color, VEC3IR, CRVectors_B, sf, lm); MAC_to_RGB_FIFO(); } @@ -1336,16 +1415,12 @@ static int32_t NCT(uint32_t instr) /* NCC - Normal Color Color */ static INLINE void NCC(uint32_t vector_index, uint32_t sf, int lm) { - int16_t tmp_vector[3]; - - MultiplyMatrixByVector(&Matrices.Light, Vectors[vector_index], CRVectors.Null, sf, lm); - - tmp_vector[0] = IR1; tmp_vector[1] = IR2; tmp_vector[2] = IR3; - MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm); + MultiplyMatrixByVector(Matrices_Light, vector_index, CRVectors_Null, sf, lm); + MultiplyMatrixByVector(Matrices_Color, VEC3IR, CRVectors_B, sf, lm); - MAC[1] = ((RGB.R << 4) * IR1) >> sf; - MAC[2] = ((RGB.G << 4) * IR2) >> sf; - MAC[3] = ((RGB.B << 4) * IR3) >> sf; + SET_MAC(1, ((RGB_R << 4) * IR1) >> sf); + SET_MAC(2, ((RGB_G << 4) * IR2) >> sf); + SET_MAC(3, ((RGB_B << 4) * IR3) >> sf); MAC_to_IR(lm); MAC_to_RGB_FIFO(); @@ -1380,14 +1455,14 @@ static INLINE void DPC(uint32_t instr) const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - RGB_temp[0] = RGB_FIFO[0].R << 4; - RGB_temp[1] = RGB_FIFO[0].G << 4; - RGB_temp[2] = RGB_FIFO[0].B << 4; + RGB_temp[0] = RGB_FIFO_R(0) << 4; + RGB_temp[1] = RGB_FIFO_G(0) << 4; + RGB_temp[2] = RGB_FIFO_B(0) << 4; for(i = 0; i < 3; i++) { - MAC[1 + i] = i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[i] << 12) - (int32)((uint32)RGB_temp[i] << 12))) >> sf; - MAC[1 + i] = i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)RGB_temp[i] << 12) + IR0 * i32_to_i16_saturate(i, MAC[1 + i], false))) >> sf; + SET_MAC(1 + i, i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC+i] << 12) - (int32)((uint32)RGB_temp[i] << 12))) >> sf); + SET_MAC(1 + i, i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)RGB_temp[i] << 12) + IR0 * i32_to_i16_saturate(i, MAC(1 + i), false))) >> sf); } MAC_to_IR(lm); @@ -1405,14 +1480,14 @@ static int32_t DCPL(uint32_t instr) const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - RGB_temp[0] = RGB.R << 4; - RGB_temp[1] = RGB.G << 4; - RGB_temp[2] = RGB.B << 4; + RGB_temp[0] = RGB_R << 4; + RGB_temp[1] = RGB_G << 4; + RGB_temp[2] = RGB_B << 4; for(i = 0; i < 3; i++) { - MAC[1 + i] = i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[i] << 12) - RGB_temp[i] * IR_temp[i])) >> sf; - MAC[1 + i] = i64_to_i44(i, (RGB_temp[i] * IR_temp[i] + IR0 * i32_to_i16_saturate(i, MAC[1 + i], false))) >> sf; + SET_MAC(1 + i, i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC+i] << 12) - RGB_temp[i] * IR_temp[i])) >> sf); + SET_MAC(1 + i, i64_to_i44(i, (RGB_temp[i] * IR_temp[i] + IR0 * i32_to_i16_saturate(i, MAC(1 + i), false))) >> sf); } MAC_to_IR(lm); @@ -1432,14 +1507,14 @@ static int32_t DPCS(uint32_t instr) const int lm = (instr >> 10) & 1; //assert(sf); - RGB_temp[0] = RGB.R << 4; - RGB_temp[1] = RGB.G << 4; - RGB_temp[2] = RGB.B << 4; + RGB_temp[0] = RGB_R << 4; + RGB_temp[1] = RGB_G << 4; + RGB_temp[2] = RGB_B << 4; for(i = 0; i < 3; i++) { - MAC[1 + i] = i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[i] << 12) - (int32)((uint32)RGB_temp[i] << 12))) >> sf; - MAC[1 + i] = i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)RGB_temp[i] << 12) + IR0 * i32_to_i16_saturate(i, MAC[1 + i], false))) >> sf; + SET_MAC(1 + i, i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC+i] << 12) - (int32)((uint32)RGB_temp[i] << 12))) >> sf); + SET_MAC(1 + i, i64_to_i44(i, ((int64_t)((uint64_t)(int64_t)RGB_temp[i] << 12) + IR0 * i32_to_i16_saturate(i, MAC(1 + i), false))) >> sf); } MAC_to_IR(lm); @@ -1468,13 +1543,13 @@ static int32_t INTPL(uint32_t instr) const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - MAC[1] = i64_to_i44(0, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[0] << 12) - (int32)((uint32)(int32)IR1 << 12))) >> sf; - MAC[2] = i64_to_i44(1, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[1] << 12) - (int32)((uint32)(int32)IR2 << 12))) >> sf; - MAC[3] = i64_to_i44(2, ((int64_t)((uint64_t)(int64_t)CRVectors.FC[2] << 12) - (int32)((uint32)(int32)IR3 << 12))) >> sf; + SET_MAC(1, i64_to_i44(0, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC] << 12) - (int32)((uint32)(int32)IR1 << 12))) >> sf); + SET_MAC(2, i64_to_i44(1, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC+1] << 12) - (int32)((uint32)(int32)IR2 << 12))) >> sf); + SET_MAC(3, i64_to_i44(2, ((int64_t)((uint64_t)(int64_t)(int32_t)CR[CRV_FC+2] << 12) - (int32)((uint32)(int32)IR3 << 12))) >> sf); - MAC[1] = i64_to_i44(0, ((int64_t)((uint64_t)(int64_t)IR1 << 12) + IR0 * i32_to_i16_saturate(0, MAC[1], false)) >> sf); - MAC[2] = i64_to_i44(1, ((int64_t)((uint64_t)(int64_t)IR2 << 12) + IR0 * i32_to_i16_saturate(1, MAC[2], false)) >> sf); - MAC[3] = i64_to_i44(2, ((int64_t)((uint64_t)(int64_t)IR3 << 12) + IR0 * i32_to_i16_saturate(2, MAC[3], false)) >> sf); + SET_MAC(1, i64_to_i44(0, ((int64_t)((uint64_t)(int64_t)IR1 << 12) + IR0 * i32_to_i16_saturate(0, MAC(1), false)) >> sf)); + SET_MAC(2, i64_to_i44(1, ((int64_t)((uint64_t)(int64_t)IR2 << 12) + IR0 * i32_to_i16_saturate(1, MAC(2), false)) >> sf)); + SET_MAC(3, i64_to_i44(2, ((int64_t)((uint64_t)(int64_t)IR3 << 12) + IR0 * i32_to_i16_saturate(2, MAC(3), false)) >> sf)); MAC_to_IR(lm); MAC_to_RGB_FIFO(); @@ -1485,20 +1560,16 @@ static int32_t INTPL(uint32_t instr) static INLINE void NormColorDepthCue(uint32_t instr, uint32_t v) { - int16_t tmp_vector[3]; const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - MultiplyMatrixByVector(&Matrices.Light, Vectors[v], CRVectors.Null, sf, lm); + MultiplyMatrixByVector(Matrices_Light, v, CRVectors_Null, sf, lm); /* Use the custom 4th vector to store the intermediate * values. This vector does not exist in the real hardware * (at least not in the registers), it's just a hack to make * the code simpler. */ - tmp_vector[0] = IR1; - tmp_vector[1] = IR2; - tmp_vector[2] = IR3; - MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm); + MultiplyMatrixByVector(Matrices_Color, VEC3IR, CRVectors_B, sf, lm); DCPL(instr); } @@ -1526,13 +1597,12 @@ static int32_t CC(uint32_t instr) { const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - int16_t tmp_vector[3] = {IR1, IR2, IR3 }; - MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm); + MultiplyMatrixByVector(Matrices_Color, VEC3IR, CRVectors_B, sf, lm); - MAC[1] = ((RGB.R << 4) * IR1) >> sf; - MAC[2] = ((RGB.G << 4) * IR2) >> sf; - MAC[3] = ((RGB.B << 4) * IR3) >> sf; + SET_MAC(1, ((RGB_R << 4) * IR1) >> sf); + SET_MAC(2, ((RGB_G << 4) * IR2) >> sf); + SET_MAC(3, ((RGB_B << 4) * IR3) >> sf); MAC_to_IR(lm); MAC_to_RGB_FIFO(); @@ -1542,14 +1612,10 @@ static int32_t CC(uint32_t instr) static int32_t CDP(uint32_t instr) { - int16_t tmp_vector[3]; const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - tmp_vector[0] = IR1; - tmp_vector[1] = IR2; - tmp_vector[2] = IR3; - MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm); + MultiplyMatrixByVector(Matrices_Color, VEC3IR, CRVectors_B, sf, lm); DCPL(instr); @@ -1559,27 +1625,26 @@ static int32_t CDP(uint32_t instr) /* Normal Clipping */ static int32_t NCLIP(uint32_t instr) { - int16_t x0 = XY_FIFO[0].X; - int16_t y0 = XY_FIFO[0].Y; - int16_t x1 = XY_FIFO[1].X; - int16_t y1 = XY_FIFO[1].Y; - int16_t x2 = XY_FIFO[2].X; - int16_t y2 = XY_FIFO[2].Y; + int16_t x0 = XY_FIFO_X(0); + int16_t y0 = XY_FIFO_Y(0); + int16_t x1 = XY_FIFO_X(1); + int16_t y1 = XY_FIFO_Y(1); + int16_t x2 = XY_FIFO_X(2); + int16_t y2 = XY_FIFO_Y(2); int64_t a = x0 * (y1 - y2); int64_t b = x1 * (y2 - y0); int64_t c = x2 * (y0 - y1); int32_t sum = a + b + c; if ((PGXP_GetModes() & PGXP_NCLIP_IMPL) && - PGXP_NCLIP_valid(*((uint32*)&XY_FIFO[0]), *((uint32*)&XY_FIFO[1]), *((uint32*)&XY_FIFO[2]))) { + PGXP_NCLIP_valid(*((uint32*)&XY_FIFO(0)), *((uint32*)&XY_FIFO(1)), *((uint32*)&XY_FIFO(2)))) { sum = PGXP_NCLIP(); } else { - sum = F( (int64_t)(XY_FIFO[0].X * (XY_FIFO[1].Y - XY_FIFO[2].Y)) + (XY_FIFO[1].X * (XY_FIFO[2].Y - XY_FIFO[0].Y)) + (XY_FIFO[2].X * (XY_FIFO[0].Y - XY_FIFO[1].Y)) - ); + sum = F( (int64_t)(XY_FIFO_X(0) * (XY_FIFO_Y(1) - XY_FIFO_Y(2))) + (XY_FIFO_X(1) * (XY_FIFO_Y(2) - XY_FIFO_Y(0))) + (XY_FIFO_X(2) * (XY_FIFO_Y(0) - XY_FIFO_Y(1)))); check_mac_overflow(sum); } - MAC[0] = sum; + SET_MAC(0, sum); return(8); } @@ -1587,9 +1652,9 @@ static int32_t NCLIP(uint32_t instr) /* Average three Z Values */ static int32_t AVSZ3(uint32_t instr) { - uint32_t z1 = Z_FIFO[1]; - uint32_t z2 = Z_FIFO[2]; - uint32_t z3 = Z_FIFO[3]; + uint32_t z1 = Z_FIFO(1); + uint32_t z2 = Z_FIFO(2); + uint32_t z3 = Z_FIFO(3); uint64_t sum = z1 + z2 + z3; /* The average factor should generally be set to 1/3th of * the ordering table size. So for instance, for a table of @@ -1600,8 +1665,8 @@ static int32_t AVSZ3(uint32_t instr) check_mac_overflow(average); - MAC[0] = (int32_t)average; - OTZ = i64_to_otz(MAC[0], false); + SET_MAC(0, (int32_t)average); + SET_OTZ(i64_to_otz(MAC(0), false)); return(5); } @@ -1609,12 +1674,12 @@ static int32_t AVSZ3(uint32_t instr) /* Average four Z values */ static int32_t AVSZ4(uint32_t instr) { - uint32_t z0 = Z_FIFO[0]; - uint32_t z1 = Z_FIFO[1]; - uint32_t z2 = Z_FIFO[2]; - uint32_t z3 = Z_FIFO[3]; + uint32_t z0 = Z_FIFO(0); + uint32_t z1 = Z_FIFO(1); + uint32_t z2 = Z_FIFO(2); + uint32_t z3 = Z_FIFO(3); uint64_t sum = z0 + z1 + z2 + z3; - /* The average factor should generally be set to 1/4th of + /* The average factor should generally be set to 1/4th of * the ordering table size. So for instance, for a table of * 1024 entries, it should be set at 256 to use the full * table granularity. */ @@ -1623,8 +1688,8 @@ static int32_t AVSZ4(uint32_t instr) check_mac_overflow(average); - MAC[0] = (int32_t)average; - OTZ = i64_to_otz(MAC[0], false); + SET_MAC(0, (int32_t)average); + SET_OTZ(i64_to_otz(MAC(0), false)); return(5); } @@ -1639,13 +1704,13 @@ static int32_t OP(uint32_t instr) int32_t ir1 = IR1; int32_t ir2 = IR2; int32_t ir3 = IR3; - int32_t r0 = Matrices.Rot.MX[0][0]; - int32_t r1 = Matrices.Rot.MX[1][1]; - int32_t r2 = Matrices.Rot.MX[2][2]; + int32_t r0 = (int16_t)CR[0]; + int32_t r1 = (int16_t)CR[2]; + int32_t r2 = (int16_t)CR[4]; - MAC[1] = (r1 * ir3 - r2 * ir2) >> sf; - MAC[2] = (r2 * ir1 - r0 * ir3) >> sf; - MAC[3] = (r0 * ir2 - r1 * ir1) >> sf; + SET_MAC(1, (r1 * ir3 - r2 * ir2) >> sf); + SET_MAC(2, (r2 * ir1 - r0 * ir3) >> sf); + SET_MAC(3, (r0 * ir2 - r1 * ir1) >> sf); MAC_to_IR(lm); @@ -1657,9 +1722,9 @@ static int32_t GPF(uint32_t instr) const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - MAC[1] = (IR0 * IR1) >> sf; - MAC[2] = (IR0 * IR2) >> sf; - MAC[3] = (IR0 * IR3) >> sf; + SET_MAC(1, (IR0 * IR1) >> sf); + SET_MAC(2, (IR0 * IR2) >> sf); + SET_MAC(3, (IR0 * IR3) >> sf); MAC_to_IR(lm); @@ -1673,9 +1738,9 @@ static int32_t GPL(uint32_t instr) const uint32_t sf = (instr & (1 << 19)) ? 12 : 0; const int lm = (instr >> 10) & 1; - MAC[1] = i64_to_i44(0, (int64_t)((uint64_t)(int64_t)MAC[1] << sf) + (IR0 * IR1)) >> sf; - MAC[2] = i64_to_i44(1, (int64_t)((uint64_t)(int64_t)MAC[2] << sf) + (IR0 * IR2)) >> sf; - MAC[3] = i64_to_i44(2, (int64_t)((uint64_t)(int64_t)MAC[3] << sf) + (IR0 * IR3)) >> sf; + SET_MAC(1, i64_to_i44(0, (int64_t)((uint64_t)(int64_t)MAC(1) << sf) + (IR0 * IR1)) >> sf); + SET_MAC(2, i64_to_i44(1, (int64_t)((uint64_t)(int64_t)MAC(2) << sf) + (IR0 * IR2)) >> sf); + SET_MAC(3, i64_to_i44(2, (int64_t)((uint64_t)(int64_t)MAC(3) << sf) + (IR0 * IR3)) >> sf); MAC_to_IR(lm); diff --git a/mednafen/psx/gte.h b/mednafen/psx/gte.h index 410723385..aca3fae37 100644 --- a/mednafen/psx/gte.h +++ b/mednafen/psx/gte.h @@ -3,6 +3,8 @@ #include "../state.h" +void GTE_SwitchRegisters(bool use_regs, uint32_t *regs); + void GTE_Init(void); void GTE_Power(void); int GTE_StateAction(StateMem *sm, int load, int data_only); diff --git a/mednafen/psx/mdec.cpp b/mednafen/psx/mdec.cpp index 27264d59e..829dd446d 100644 --- a/mednafen/psx/mdec.cpp +++ b/mednafen/psx/mdec.cpp @@ -124,6 +124,8 @@ static const uint8 ZigZag[64] = 0x2e, 0x27, 0x2f, 0x36, 0x3d, 0x3e, 0x37, 0x3f, }; +extern int32 EventCycles; + void MDEC_Power(void) { ClockCounter = 0; @@ -527,11 +529,11 @@ void MDEC_Run(int32 clocks) ClockCounter += clocks; - if(ClockCounter > 128) + if(ClockCounter > EventCycles) { //if(MDRPhase != 0) // printf("SNORT: %d\n", ClockCounter); - ClockCounter = 128; + ClockCounter = EventCycles; } switch(MDRPhase + MDRPhaseBias) diff --git a/mednafen/psx/psx.h b/mednafen/psx/psx.h index e5ba9be66..289023431 100644 --- a/mednafen/psx/psx.h +++ b/mednafen/psx/psx.h @@ -100,11 +100,6 @@ extern MultiAccessSizeMem<512 * 1024, uint32, false> *BIOSROM; extern MultiAccessSizeMem<2048 * 1024, uint32_t, false> *MainRAM; extern MultiAccessSizeMem<1024, uint32_t, false> *ScratchRAM; -#ifdef HAVE_LIGHTREC -enum DYNAREC {DYNAREC_DISABLED, DYNAREC_EXECUTE, DYNAREC_EXECUTE_ONE, DYNAREC_RUN_INTERPRETER}; -extern enum DYNAREC psx_dynarec; -#endif - #define OVERCLOCK_SHIFT 8 extern int32_t psx_overclock_factor; diff --git a/mednafen/psx/spu.cpp b/mednafen/psx/spu.cpp index 1f6afcc52..513628aef 100644 --- a/mednafen/psx/spu.cpp +++ b/mednafen/psx/spu.cpp @@ -76,6 +76,7 @@ uint32_t IntermediateBufferPos; int16_t IntermediateBuffer[4096][2]; +extern uint8_t spu_samples; static const int16 FIR_Table[256][4] = { @@ -746,8 +747,8 @@ int32 PS_SPU::UpdateFromCDC(int32 clocks) while(clock_divider <= 0) { - clock_divider += 768; - sample_clocks++; + clock_divider += spu_samples*768; + sample_clocks += spu_samples; } while(sample_clocks > 0) @@ -1451,8 +1452,8 @@ int PS_SPU::StateAction(StateMem *sm, int load, int data_only) Voices[i].LoopAddr &= 0x3FFFF; } - if(clock_divider <= 0 || clock_divider > 768) - clock_divider = 768; + if(clock_divider <= 0 || clock_divider > spu_samples*768) + clock_divider = spu_samples*768; RWAddr &= 0x3FFFF; CWA &= 0x1FF; diff --git a/mednafen/psx/timer.cpp b/mednafen/psx/timer.cpp index b7cac7031..e0a691a62 100644 --- a/mednafen/psx/timer.cpp +++ b/mednafen/psx/timer.cpp @@ -111,9 +111,11 @@ static bool hretrace; static Timer Timers[3]; static int32_t lastts; +extern int32 EventCycles; + static uint32_t CalcNextEvent(void) { - int32_t next_event = 1024; /**/ + int32_t next_event = 8*EventCycles; /**/ unsigned i; for(i = 0; i < 3; i++) diff --git a/mednafen/state.c b/mednafen/state.c index 7339a3cb7..567ab0c13 100644 --- a/mednafen/state.c +++ b/mednafen/state.c @@ -137,7 +137,7 @@ static bool SubWrite(StateMem *st, SFORMAT *sf) { while(sf->size || sf->name) /* Size can sometimes be zero, so also check for the text name. These two should both be zero only at the end of a struct. */ { - if(!sf->size || !sf->v) + if(!sf->size || sf->v == INVALID_PTR || (!strcmp(sf->name,"MainRAM->data8") && !sf->v)) { sf++; continue; @@ -242,9 +242,9 @@ static int WriteStateChunk(StateMem *st, const char *sname, SFORMAT *sf) static SFORMAT *FindSF(const char *name, SFORMAT *sf) { /* Size can sometimes be zero, so also check for the text name. These two should both be zero only at the end of a struct. */ - while(sf->size || sf->name) + while(sf->size || sf->name) { - if(!sf->size || !sf->v) + if(!sf->size || sf->v == INVALID_PTR || (!strcmp(sf->name,"MainRAM->data8") && !sf->v)) { sf++; continue; @@ -379,7 +379,7 @@ static int MDFNSS_StateAction_internal(void *st_p, int load, int data_only, stru return(0); found = 1; break; - } + } else { if(smem_seek(st, tmp_size, SSEEK_CUR) < 0) diff --git a/mednafen/state.h b/mednafen/state.h index cc0420fe2..e38bd92cc 100644 --- a/mednafen/state.h +++ b/mednafen/state.h @@ -4,6 +4,8 @@ #include #include +#include "mednafen-types.h" + typedef struct { uint8_t *data;